From 9601f3be1ee5be711671a922c3282339aeb1ddf4 Mon Sep 17 00:00:00 2001 From: emkael Date: Mon, 31 Jan 2022 21:39:23 +0100 Subject: Another round of FB crap --- _cron/fb-cache-files | 3 ++- bin/fb-scrape/.gitignore | 2 ++ bin/fb-scrape/get-fb-content.py | 42 ++++++++++++++++++++++++++++++++++++++++ bin/get-fb-content.py | 20 ------------------- bin/refresh-fb-cache.php | 2 +- config/facebook.com_cookies.txt | Bin 1154 -> 875 bytes providers/Facebook.php | 19 +++++++++++------- 7 files changed, 59 insertions(+), 29 deletions(-) create mode 100644 bin/fb-scrape/.gitignore create mode 100644 bin/fb-scrape/get-fb-content.py delete mode 100644 bin/get-fb-content.py diff --git a/_cron/fb-cache-files b/_cron/fb-cache-files index c6b2cb1..4e116d3 100644 --- a/_cron/fb-cache-files +++ b/_cron/fb-cache-files @@ -1,2 +1,3 @@ -*/10 * * * * $SITEPATH/bin/refresh-fb-cache.php +DIRENV_LOG_FORMAT="" +14 * * * * $SITEPATH/bin/refresh-fb-cache.php 10 * * * * find $SITEPATH/cache -size 6c diff --git a/bin/fb-scrape/.gitignore b/bin/fb-scrape/.gitignore new file mode 100644 index 0000000..772944e --- /dev/null +++ b/bin/fb-scrape/.gitignore @@ -0,0 +1,2 @@ +.envrc +.direnv diff --git a/bin/fb-scrape/get-fb-content.py b/bin/fb-scrape/get-fb-content.py new file mode 100644 index 0000000..5b22f70 --- /dev/null +++ b/bin/fb-scrape/get-fb-content.py @@ -0,0 +1,42 @@ +import json +import logging +import sys +import time +import warnings +from os import path +from pytz_deprecation_shim import PytzUsageWarning +from random import randint + +from facebook_scraper import get_posts, enable_logging +from requests.exceptions import RequestException + +debug = len(sys.argv) > 2 and sys.argv[2] == 'debug' + +if debug: + enable_logging(logging.DEBUG) + +warnings.filterwarnings( + action='ignore', + message=r'A low page limit' +) +warnings.filterwarnings( + action='ignore', + category=PytzUsageWarning +) + +BASEDIR = path.dirname(__file__) + +posts = [] +try: + for post in get_posts(sys.argv[1], pages=2, cookies=path.join(BASEDIR, '../../config/facebook.com_cookies.txt')): + posts.append({ + 'id': post['post_id'], + 'time': str(post['time']), + 'texts': [t.strip() for t in post['text'].split('\n') if t] if post['text'] else [], + 'images': post['images'] + }) + time.sleep(randint(10, 15)) +except RequestException: + pass + +print(json.dumps(posts)) diff --git a/bin/get-fb-content.py b/bin/get-fb-content.py deleted file mode 100644 index 180c7b4..0000000 --- a/bin/get-fb-content.py +++ /dev/null @@ -1,20 +0,0 @@ -import json -import logging -import sys -from os import path - -from facebook_scraper import get_posts - - -BASEDIR = path.dirname(__file__) - -posts = [] -for post in get_posts(sys.argv[1], cookies=path.join(BASEDIR, '../config/facebook.com_cookies.txt'), pages=3): - posts.append({ - 'id': post['post_id'], - 'time': str(post['time']), - 'texts': [t.strip() for t in post['text'].split('\n') if t], - 'images': post['images'] - }) - -print(json.dumps(posts)) diff --git a/bin/refresh-fb-cache.php b/bin/refresh-fb-cache.php index 4b702ef..ca50146 100755 --- a/bin/refresh-fb-cache.php +++ b/bin/refresh-fb-cache.php @@ -25,7 +25,7 @@ foreach ($cacheFiles as $file) { } } -$filesToFetch = ['Piwoteka', 'fermentlodz', '103731068463865', $fileToFetch]; +$filesToFetch = ['Piwoteka', 'fermentlodz', $fileToFetch]; foreach ($filesToFetch as $feed) { sleep(rand(60, 90)); diff --git a/config/facebook.com_cookies.txt b/config/facebook.com_cookies.txt index 26d3c0e..42160a8 100644 Binary files a/config/facebook.com_cookies.txt and b/config/facebook.com_cookies.txt differ diff --git a/providers/Facebook.php b/providers/Facebook.php index 5eb588e..20f5028 100644 --- a/providers/Facebook.php +++ b/providers/Facebook.php @@ -22,7 +22,10 @@ class Facebook extends \Providers\Provider { protected function _fetchItems() { $jsonContent = []; - exec('python3 ' . implode(DIRECTORY_SEPARATOR, [dirname(__FILE__), '..', 'bin', 'get-fb-content.py']) . ' ' . escapeshellarg($this->_feed), $jsonContent); + exec('direnv exec ' . + implode(DIRECTORY_SEPARATOR, [dirname(__FILE__), '..', 'bin', 'fb-scrape']) . ' ' . + 'python ' . implode(DIRECTORY_SEPARATOR, [dirname(__FILE__), '..', 'bin', 'fb-scrape', 'get-fb-content.py']) . ' ' . + escapeshellarg($this->_feed), $jsonContent); return json_decode(implode(PHP_EOL, $jsonContent), TRUE); } @@ -33,12 +36,14 @@ class Facebook extends \Providers\Provider { if (!count($texts)) { $texts[] = ''; } - $texts = array_merge( - $texts, - array_map(function($i) { - return sprintf('', $i); - }, $obj['images']) - ); + if ($obj['images']) { + $texts = array_merge( + $texts, + array_map(function($i) { + return sprintf('', $i); + }, $obj['images']) + ); + } $item = new Item(); $item->ID = $obj['id']; $item->Link = sprintf( -- cgit v1.2.3