From 9601f3be1ee5be711671a922c3282339aeb1ddf4 Mon Sep 17 00:00:00 2001
From: emkael <emkael@tlen.pl>
Date: Mon, 31 Jan 2022 21:39:23 +0100
Subject: Another round of FB crap

---
 bin/fb-scrape/.gitignore        |  2 ++
 bin/fb-scrape/get-fb-content.py | 42 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100644 bin/fb-scrape/.gitignore
 create mode 100644 bin/fb-scrape/get-fb-content.py

(limited to 'bin/fb-scrape')

diff --git a/bin/fb-scrape/.gitignore b/bin/fb-scrape/.gitignore
new file mode 100644
index 0000000..772944e
--- /dev/null
+++ b/bin/fb-scrape/.gitignore
@@ -0,0 +1,2 @@
+.envrc
+.direnv
diff --git a/bin/fb-scrape/get-fb-content.py b/bin/fb-scrape/get-fb-content.py
new file mode 100644
index 0000000..5b22f70
--- /dev/null
+++ b/bin/fb-scrape/get-fb-content.py
@@ -0,0 +1,42 @@
+import json
+import logging
+import sys
+import time
+import warnings
+from os import path
+from pytz_deprecation_shim import PytzUsageWarning
+from random import randint
+
+from facebook_scraper import get_posts, enable_logging
+from requests.exceptions import RequestException
+
+debug = len(sys.argv) > 2 and sys.argv[2] == 'debug'
+
+if debug:
+    enable_logging(logging.DEBUG)
+
+warnings.filterwarnings(
+    action='ignore',
+    message=r'A low page limit'
+)
+warnings.filterwarnings(
+    action='ignore',
+    category=PytzUsageWarning
+)
+
+BASEDIR = path.dirname(__file__)
+
+posts = []
+try:
+    for post in get_posts(sys.argv[1], pages=2, cookies=path.join(BASEDIR, '../../config/facebook.com_cookies.txt')):
+        posts.append({
+            'id': post['post_id'],
+            'time': str(post['time']),
+            'texts': [t.strip() for t in post['text'].split('\n') if t] if post['text'] else [],
+            'images': post['images']
+        })
+        time.sleep(randint(10, 15))
+except RequestException:
+    pass
+
+print(json.dumps(posts))
-- 
cgit v1.2.3