summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoremkael <emkael@tlen.pl>2019-01-31 16:26:45 +0100
committeremkael <emkael@tlen.pl>2019-01-31 19:41:51 +0100
commit071ec7fc3a9391d98a0fdfda741d5cf064f4ed48 (patch)
tree754d1a1399f04ca452cff54495ef68dade28a0cb
parent9579d743a123a65913cc39ae1ff7ddf600923534 (diff)
Facebook providers based on the mobile site, fuck you
-rw-r--r--providers/Facebook.php107
1 files changed, 107 insertions, 0 deletions
diff --git a/providers/Facebook.php b/providers/Facebook.php
new file mode 100644
index 0000000..6514cfe
--- /dev/null
+++ b/providers/Facebook.php
@@ -0,0 +1,107 @@
+<?php
+
+namespace Providers;
+
+require_once('Provider.php');
+require_once('Item.php');
+require_once('../lib/querypath/src/qp.php');
+
+class Facebook extends \Providers\Provider {
+
+ protected $_cacheTimeout = '5 minutes';
+ //protected $_cacheTimeout = '1 second';
+ private $_encoding;
+
+ public function __construct($feed, $options=[]) {
+ parent::__construct($feed, $options);
+ }
+
+ protected function _getCachePath() {
+ return '../cache/facebook.%s';
+ }
+
+ private function __getWWWContent() {
+ $header = array();
+ $header[] = 'Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5';
+ $header[] = 'Cache-Control: max-age=0';
+ $header[] = 'Connection: keep-alive';
+ $header[] = 'Keep-Alive: 300';
+ $header[] = 'Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7';
+ $header[] = 'Pragma: ';
+ $ch = curl_init(sprintf('https://m.facebook.com/%s/', $this->_feed));
+ curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.11) Gecko/2009060215 Firefox/3.0.11 (.NET CLR 3.5.30729)');
+ curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
+ curl_setopt($ch, CURLOPT_AUTOREFERER, TRUE);
+ curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
+ curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
+ curl_setopt($ch, CURLOPT_ENCODING, '');
+ curl_setopt($ch, CURLOPT_TIMEOUT, 20);
+ $page = curl_exec($ch);
+ curl_close ($ch);
+ $this->_encoding = mb_detect_encoding($page);
+ return $page;
+ }
+
+ protected function _fetchItems() {
+ $items = [];
+ $page = $this->__getWWWContent();
+ $tree = htmlqp($page, NULL, ['convert_from_encoding' => $this->_encoding, 'convert_to_encoding' => $this->_encoding]);
+ $parents = [];
+ foreach ($tree->find('div[data-ft]') as $div) {
+ $data = json_decode($div->attr()['data-ft'], TRUE);
+ if (isset($data['mf_story_key'])) {
+ $pIns = $data['page_insights'][$data['page_id']];
+ $key = $data['mf_story_key'];
+ $texts = [];
+ foreach ($div->find('h3') as $h3) {
+ if (!$h3->find(sprintf('a[href^="/%s/"]', $this->_feed))->count() || $h3->find('a')->count() > 1) {
+ $texts[] = trim($h3->text());
+ }
+ }
+ foreach ($div->find('div>span p') as $p) {
+ $texts[] = trim(strip_tags($p->html()));
+ }
+ $items[$key] = [
+ 'id' => $key,
+ 'content' => $div->html(),
+ 'texts' => $texts,
+ 'time' => $pIns['post_context']['publish_time']
+ ];
+ }
+ }
+ return array_values($items);
+ }
+
+ protected function _spamFilter($items) {
+ return $items;
+ }
+
+ protected function _mapItems($content) {
+ return array_map(
+ function ($obj) {
+ $item = new Item();
+ $item->ID = $obj['id'];
+ $item->Link = sprintf(
+ 'https://facebook.com/%s',
+ $obj['id']
+ );
+ $item->Title = $obj['texts'][0];
+ $item->Text = implode('<br />', $obj['texts']);
+ $item->Time = $obj['time'];
+ return $item;
+ },
+ $content
+ );
+ }
+
+ protected function _sortContent($content) {
+ return $content;
+ }
+
+ public function title() {
+ return sprintf("%s's Facebook page posts", $this->_feed);
+ }
+
+}
+
+?>