summaryrefslogtreecommitdiff
path: root/providers/Facebook.php
blob: d5233c356015e200678b59a11bdac32313e102ca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
<?php

namespace Providers;

require_once('HtmlFeed.php');
require_once('Item.php');

class Facebook extends \Providers\HtmlFeed {

    protected $_cacheTimeout = '300 years';

    public function __construct($feed, $options) {
        parent::__construct($feed, $options);
        if (isset($this->_options['dump'])) {
            $this->_options['force'] = TRUE;
        }
        if (isset($this->_options['force'])) {
            $this->_cacheTimeout = '1 second';
        }
    }

    protected function __getUserAgent() {
        return 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.51 Safari/537.36';
    }

    protected function _getCachePath() {
        return '../cache/facebook.%s';
    }

    protected function _getFeedUrl($feed) {
        return sprintf('https://m.facebook.com/%s/posts', $feed);
    }

    private function _extractTimestamp($block) {
        $origString = (string)$block->find('abbr')->eq(0)->text();
        $string = str_replace("\xc2\xa0", ' ', $origString);
        $string = str_replace(' godzinie ', ' ', $string);
        $string = str_replace(' o ', ', ', $string);
        $string = str_replace('Wczoraj', 'Yesterday', $string);
        $string = str_replace('Dzisiaj', '', $string);
        $string = str_replace('godz.', 'hours ago', $string);
        $string = str_replace('min', 'minutes ago', $string);
        $string = str_replace('Przed chwilą', 'now', $string);
        $string = str_replace(['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca', 'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'], ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'], $string);
        $time = strtotime($string);
        if (!$time) {
            var_dump(bin2hex($origString));
            throw new \Exception('Cannot parse date string: ' . $origString);
        }
        return $string;
    }

    protected function _parseFeedContent($tree) {
        $items = [];
        if (isset($this->_options['dump'])) {
            print($tree->html());
        }
        foreach ($tree->find('#timelineBody div[data-ft]') as $header) {
            $data = json_decode($header->attr()['data-ft'], TRUE);
            if (isset($this->_options['dump'])) {
                var_dump($data);
            }
            if (isset($data['mf_story_key'])) {
                $key = $data['mf_story_key'];
                $texts = [];
                foreach ($header->find('p, h3') as $paragraph) {
                    $text = $paragraph->text();
                    if ($text != 'Więcej') {
                        $texts[] = $text;
                    }
                }
                if (isset($this->_options['dump'])) {
                    print_r($data);
                    print($key);
                    print(PHP_EOL);
                    print_r($texts);
                    print(PHP_EOL);
                }
                if (count($texts)) {
                    $items[$key] = [
                        'id' => $key,
                        'time' => $this->_extractTimestamp($header),
                        'content' => $header->html(),
                        'texts' => $texts
                    ];
                }
            }
        }
        if (isset($this->_options['dump'])) {
            die();
        }
        return array_values($items);
    }

    protected function _mapItems($content) {
        return array_map(
            function ($obj) {
                $item = new Item();
                $item->ID = $obj['id'];
                $item->Link = sprintf(
                    'https://facebook.com/%s',
                    $obj['id']
                );
                $item->Title = $obj['texts'][0];
                $item->Text = implode('<br />', $obj['texts']);
                $item->Time = $obj['time'];
                return $item;
            },
            $content
        );
    }

    public function title() {
        return sprintf("%s's Facebook page posts", $this->_feed);
    }

}

?>