summaryrefslogtreecommitdiff
path: root/bin/fb-scrape/get-fb-content.py
blob: 6d6b7ba53efa946317edcb6366254a8cea3966f1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import json
import logging
import sys
import time
import warnings
from os import path
from pytz_deprecation_shim import PytzUsageWarning
from random import randint

from facebook_scraper import get_posts, enable_logging
from facebook_scraper.exceptions import TemporarilyBanned
from requests.exceptions import RequestException

debug = len(sys.argv) > 2 and sys.argv[2] == 'debug'

if debug:
    enable_logging(logging.DEBUG)

warnings.filterwarnings(
    action='ignore',
    message=r'A low page limit'
)
warnings.filterwarnings(
    action='ignore',
    message=r"Facebook says 'Unsupported Browser'"
)
warnings.filterwarnings(
    action='ignore',
    category=PytzUsageWarning
)

BASEDIR = path.dirname(__file__)

posts = []
try:
    for post in get_posts(sys.argv[1], pages=2, cookies=path.join(BASEDIR, '../../config/facebook.com_cookies.txt')):
        posts.append({
            'id': post['post_id'],
            'time': str(post['time']),
            'texts': [t.strip() for t in post['text'].split('\n') if t] if post['text'] else [],
            'images': post['images']
        })
        time.sleep(randint(10, 15))
except (RequestException, TemporarilyBanned):
    pass

print(json.dumps(posts))