summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoremkael <emkael@tlen.pl>2022-01-31 21:39:23 +0100
committeremkael <emkael@tlen.pl>2022-01-31 21:39:23 +0100
commit9601f3be1ee5be711671a922c3282339aeb1ddf4 (patch)
tree7b402675b2029e49b5756f59c2e5ef77db8186cc
parent489816e3573afff315f61292ba2e1afc2fe96a16 (diff)
Another round of FB crap
-rw-r--r--_cron/fb-cache-files3
-rw-r--r--bin/fb-scrape/.gitignore2
-rw-r--r--bin/fb-scrape/get-fb-content.py42
-rw-r--r--bin/get-fb-content.py20
-rwxr-xr-xbin/refresh-fb-cache.php2
-rw-r--r--config/facebook.com_cookies.txtbin1154 -> 875 bytes
-rw-r--r--providers/Facebook.php19
7 files changed, 59 insertions, 29 deletions
diff --git a/_cron/fb-cache-files b/_cron/fb-cache-files
index c6b2cb1..4e116d3 100644
--- a/_cron/fb-cache-files
+++ b/_cron/fb-cache-files
@@ -1,2 +1,3 @@
-*/10 * * * * $SITEPATH/bin/refresh-fb-cache.php
+DIRENV_LOG_FORMAT=""
+14 * * * * $SITEPATH/bin/refresh-fb-cache.php
10 * * * * find $SITEPATH/cache -size 6c
diff --git a/bin/fb-scrape/.gitignore b/bin/fb-scrape/.gitignore
new file mode 100644
index 0000000..772944e
--- /dev/null
+++ b/bin/fb-scrape/.gitignore
@@ -0,0 +1,2 @@
+.envrc
+.direnv
diff --git a/bin/fb-scrape/get-fb-content.py b/bin/fb-scrape/get-fb-content.py
new file mode 100644
index 0000000..5b22f70
--- /dev/null
+++ b/bin/fb-scrape/get-fb-content.py
@@ -0,0 +1,42 @@
+import json
+import logging
+import sys
+import time
+import warnings
+from os import path
+from pytz_deprecation_shim import PytzUsageWarning
+from random import randint
+
+from facebook_scraper import get_posts, enable_logging
+from requests.exceptions import RequestException
+
+debug = len(sys.argv) > 2 and sys.argv[2] == 'debug'
+
+if debug:
+ enable_logging(logging.DEBUG)
+
+warnings.filterwarnings(
+ action='ignore',
+ message=r'A low page limit'
+)
+warnings.filterwarnings(
+ action='ignore',
+ category=PytzUsageWarning
+)
+
+BASEDIR = path.dirname(__file__)
+
+posts = []
+try:
+ for post in get_posts(sys.argv[1], pages=2, cookies=path.join(BASEDIR, '../../config/facebook.com_cookies.txt')):
+ posts.append({
+ 'id': post['post_id'],
+ 'time': str(post['time']),
+ 'texts': [t.strip() for t in post['text'].split('\n') if t] if post['text'] else [],
+ 'images': post['images']
+ })
+ time.sleep(randint(10, 15))
+except RequestException:
+ pass
+
+print(json.dumps(posts))
diff --git a/bin/get-fb-content.py b/bin/get-fb-content.py
deleted file mode 100644
index 180c7b4..0000000
--- a/bin/get-fb-content.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import json
-import logging
-import sys
-from os import path
-
-from facebook_scraper import get_posts
-
-
-BASEDIR = path.dirname(__file__)
-
-posts = []
-for post in get_posts(sys.argv[1], cookies=path.join(BASEDIR, '../config/facebook.com_cookies.txt'), pages=3):
- posts.append({
- 'id': post['post_id'],
- 'time': str(post['time']),
- 'texts': [t.strip() for t in post['text'].split('\n') if t],
- 'images': post['images']
- })
-
-print(json.dumps(posts))
diff --git a/bin/refresh-fb-cache.php b/bin/refresh-fb-cache.php
index 4b702ef..ca50146 100755
--- a/bin/refresh-fb-cache.php
+++ b/bin/refresh-fb-cache.php
@@ -25,7 +25,7 @@ foreach ($cacheFiles as $file) {
}
}
-$filesToFetch = ['Piwoteka', 'fermentlodz', '103731068463865', $fileToFetch];
+$filesToFetch = ['Piwoteka', 'fermentlodz', $fileToFetch];
foreach ($filesToFetch as $feed) {
sleep(rand(60, 90));
diff --git a/config/facebook.com_cookies.txt b/config/facebook.com_cookies.txt
index 26d3c0e..42160a8 100644
--- a/config/facebook.com_cookies.txt
+++ b/config/facebook.com_cookies.txt
Binary files differ
diff --git a/providers/Facebook.php b/providers/Facebook.php
index 5eb588e..20f5028 100644
--- a/providers/Facebook.php
+++ b/providers/Facebook.php
@@ -22,7 +22,10 @@ class Facebook extends \Providers\Provider {
protected function _fetchItems() {
$jsonContent = [];
- exec('python3 ' . implode(DIRECTORY_SEPARATOR, [dirname(__FILE__), '..', 'bin', 'get-fb-content.py']) . ' ' . escapeshellarg($this->_feed), $jsonContent);
+ exec('direnv exec ' .
+ implode(DIRECTORY_SEPARATOR, [dirname(__FILE__), '..', 'bin', 'fb-scrape']) . ' ' .
+ 'python ' . implode(DIRECTORY_SEPARATOR, [dirname(__FILE__), '..', 'bin', 'fb-scrape', 'get-fb-content.py']) . ' ' .
+ escapeshellarg($this->_feed), $jsonContent);
return json_decode(implode(PHP_EOL, $jsonContent), TRUE);
}
@@ -33,12 +36,14 @@ class Facebook extends \Providers\Provider {
if (!count($texts)) {
$texts[] = '';
}
- $texts = array_merge(
- $texts,
- array_map(function($i) {
- return sprintf('<img src="%s" />', $i);
- }, $obj['images'])
- );
+ if ($obj['images']) {
+ $texts = array_merge(
+ $texts,
+ array_map(function($i) {
+ return sprintf('<img src="%s" />', $i);
+ }, $obj['images'])
+ );
+ }
$item = new Item();
$item->ID = $obj['id'];
$item->Link = sprintf(