summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoremkael <emkael@tlen.pl>2024-11-25 23:24:26 +0100
committeremkael <emkael@tlen.pl>2024-11-25 23:24:26 +0100
commit07dc74d9e73585d840854c642523f0fc05075df8 (patch)
treea76bf82f6faee22a57bd6834595894161c98700c
parent373141ac8765c76f06c496a51b320852447c20ba (diff)
Fuck off, Zuck
-rw-r--r--_cron/fb-cache-files3
-rw-r--r--bin/fb-scrape/get-fb-content.py7
-rw-r--r--bin/fb-scrape/requirements.txt2
-rwxr-xr-xbin/refresh-fb-cache.php8
-rw-r--r--config/facebook.com_cookies.txtbin875 -> 1107 bytes
-rw-r--r--providers/Facebook.php18
-rw-r--r--providers/Provider.php3
7 files changed, 33 insertions, 8 deletions
diff --git a/_cron/fb-cache-files b/_cron/fb-cache-files
index 4e116d3..cbcbba0 100644
--- a/_cron/fb-cache-files
+++ b/_cron/fb-cache-files
@@ -1,3 +1,2 @@
DIRENV_LOG_FORMAT=""
-14 * * * * $SITEPATH/bin/refresh-fb-cache.php
-10 * * * * find $SITEPATH/cache -size 6c
+# 14 */3 * * * $SITEPATH/bin/refresh-fb-cache.php
diff --git a/bin/fb-scrape/get-fb-content.py b/bin/fb-scrape/get-fb-content.py
index 5b22f70..6d6b7ba 100644
--- a/bin/fb-scrape/get-fb-content.py
+++ b/bin/fb-scrape/get-fb-content.py
@@ -8,6 +8,7 @@ from pytz_deprecation_shim import PytzUsageWarning
from random import randint
from facebook_scraper import get_posts, enable_logging
+from facebook_scraper.exceptions import TemporarilyBanned
from requests.exceptions import RequestException
debug = len(sys.argv) > 2 and sys.argv[2] == 'debug'
@@ -21,6 +22,10 @@ warnings.filterwarnings(
)
warnings.filterwarnings(
action='ignore',
+ message=r"Facebook says 'Unsupported Browser'"
+)
+warnings.filterwarnings(
+ action='ignore',
category=PytzUsageWarning
)
@@ -36,7 +41,7 @@ try:
'images': post['images']
})
time.sleep(randint(10, 15))
-except RequestException:
+except (RequestException, TemporarilyBanned):
pass
print(json.dumps(posts))
diff --git a/bin/fb-scrape/requirements.txt b/bin/fb-scrape/requirements.txt
new file mode 100644
index 0000000..6632c5e
--- /dev/null
+++ b/bin/fb-scrape/requirements.txt
@@ -0,0 +1,2 @@
+facebook-scraper
+pytz-deprecation-shim
diff --git a/bin/refresh-fb-cache.php b/bin/refresh-fb-cache.php
index ca50146..6a841de 100755
--- a/bin/refresh-fb-cache.php
+++ b/bin/refresh-fb-cache.php
@@ -13,7 +13,7 @@ $cacheFiles = glob('../cache/facebook.*');
$fileToFetch = NULL;
$oldestCache = PHP_INT_MAX;
-foreach ($cacheFiles as $file) {
+/*foreach ($cacheFiles as $file) {
if (!preg_match('/\.Piwoteka$/', $file)) {
$cacheTime = filemtime($file);
if ($cacheTime < $oldestCache) {
@@ -23,12 +23,12 @@ foreach ($cacheFiles as $file) {
$fileToFetch = implode('.', $fileToFetch);
}
}
-}
+ }*/
-$filesToFetch = ['Piwoteka', 'fermentlodz', $fileToFetch];
+$filesToFetch = ['Piwoteka', 'EtreGourmet']; //, $fileToFetch];
foreach ($filesToFetch as $feed) {
- sleep(rand(60, 90));
+ sleep(rand(90, 120));
$provider = new \Providers\Facebook($feed, ['force' => TRUE]);
if (!$provider->get()) {
throw new \Exception(sprintf('Feed %s failed to fetch!', $feed));
diff --git a/config/facebook.com_cookies.txt b/config/facebook.com_cookies.txt
index 42160a8..8f01e56 100644
--- a/config/facebook.com_cookies.txt
+++ b/config/facebook.com_cookies.txt
Binary files differ
diff --git a/providers/Facebook.php b/providers/Facebook.php
index 20f5028..384c013 100644
--- a/providers/Facebook.php
+++ b/providers/Facebook.php
@@ -26,7 +26,23 @@ class Facebook extends \Providers\Provider {
implode(DIRECTORY_SEPARATOR, [dirname(__FILE__), '..', 'bin', 'fb-scrape']) . ' ' .
'python ' . implode(DIRECTORY_SEPARATOR, [dirname(__FILE__), '..', 'bin', 'fb-scrape', 'get-fb-content.py']) . ' ' .
escapeshellarg($this->_feed), $jsonContent);
- return json_decode(implode(PHP_EOL, $jsonContent), TRUE);
+ $cacheFile = sprintf($this->_getCachePath(), $this->_feed);
+ if (file_exists($cacheFile)) {
+ $cache = unserialize($this->_getCache($cacheFile));
+ }
+ else {
+ $cache = [];
+ }
+ $fetched = json_decode(implode(PHP_EOL, $jsonContent), TRUE);
+ $cacheIDs = array_map(function($obj) {
+ return $obj['id'];
+ }, $cache);
+ foreach ($fetched as $fetchedItem) {
+ if (!in_array($fetchedItem['id'], $cacheIDs)) {
+ $cache[] = $fetchedItem;
+ }
+ }
+ return $cache;
}
protected function _mapItems($content) {
diff --git a/providers/Provider.php b/providers/Provider.php
index 434b1d1..7e7dbf0 100644
--- a/providers/Provider.php
+++ b/providers/Provider.php
@@ -35,6 +35,9 @@ abstract class Provider {
return unserialize($this->_getCache($cacheFile));
} else {
$content = $this->_fetchItems();
+ if (empty($content) && file_exists($cacheFile)) {
+ return unserialize($this->_getCache($cacheFile));
+ }
file_put_contents($cacheFile, serialize($content));
$this->_cacheTime = time();
return $content;