diff options
author | emkael <emkael@tlen.pl> | 2019-01-31 16:26:45 +0100 |
---|---|---|
committer | emkael <emkael@tlen.pl> | 2019-01-31 19:41:51 +0100 |
commit | 071ec7fc3a9391d98a0fdfda741d5cf064f4ed48 (patch) | |
tree | 754d1a1399f04ca452cff54495ef68dade28a0cb /providers | |
parent | 9579d743a123a65913cc39ae1ff7ddf600923534 (diff) |
Facebook providers based on the mobile site, fuck you
Diffstat (limited to 'providers')
-rw-r--r-- | providers/Facebook.php | 107 |
1 files changed, 107 insertions, 0 deletions
diff --git a/providers/Facebook.php b/providers/Facebook.php new file mode 100644 index 0000000..6514cfe --- /dev/null +++ b/providers/Facebook.php @@ -0,0 +1,107 @@ +<?php + +namespace Providers; + +require_once('Provider.php'); +require_once('Item.php'); +require_once('../lib/querypath/src/qp.php'); + +class Facebook extends \Providers\Provider { + + protected $_cacheTimeout = '5 minutes'; + //protected $_cacheTimeout = '1 second'; + private $_encoding; + + public function __construct($feed, $options=[]) { + parent::__construct($feed, $options); + } + + protected function _getCachePath() { + return '../cache/facebook.%s'; + } + + private function __getWWWContent() { + $header = array(); + $header[] = 'Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5'; + $header[] = 'Cache-Control: max-age=0'; + $header[] = 'Connection: keep-alive'; + $header[] = 'Keep-Alive: 300'; + $header[] = 'Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7'; + $header[] = 'Pragma: '; + $ch = curl_init(sprintf('https://m.facebook.com/%s/', $this->_feed)); + curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.11) Gecko/2009060215 Firefox/3.0.11 (.NET CLR 3.5.30729)'); + curl_setopt($ch, CURLOPT_HTTPHEADER, $header); + curl_setopt($ch, CURLOPT_AUTOREFERER, TRUE); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); + curl_setopt($ch, CURLOPT_ENCODING, ''); + curl_setopt($ch, CURLOPT_TIMEOUT, 20); + $page = curl_exec($ch); + curl_close ($ch); + $this->_encoding = mb_detect_encoding($page); + return $page; + } + + protected function _fetchItems() { + $items = []; + $page = $this->__getWWWContent(); + $tree = htmlqp($page, NULL, ['convert_from_encoding' => $this->_encoding, 'convert_to_encoding' => $this->_encoding]); + $parents = []; + foreach ($tree->find('div[data-ft]') as $div) { + $data = json_decode($div->attr()['data-ft'], TRUE); + if (isset($data['mf_story_key'])) { + $pIns = $data['page_insights'][$data['page_id']]; + $key = $data['mf_story_key']; + $texts = []; + foreach ($div->find('h3') as $h3) { + if (!$h3->find(sprintf('a[href^="/%s/"]', $this->_feed))->count() || $h3->find('a')->count() > 1) { + $texts[] = trim($h3->text()); + } + } + foreach ($div->find('div>span p') as $p) { + $texts[] = trim(strip_tags($p->html())); + } + $items[$key] = [ + 'id' => $key, + 'content' => $div->html(), + 'texts' => $texts, + 'time' => $pIns['post_context']['publish_time'] + ]; + } + } + return array_values($items); + } + + protected function _spamFilter($items) { + return $items; + } + + protected function _mapItems($content) { + return array_map( + function ($obj) { + $item = new Item(); + $item->ID = $obj['id']; + $item->Link = sprintf( + 'https://facebook.com/%s', + $obj['id'] + ); + $item->Title = $obj['texts'][0]; + $item->Text = implode('<br />', $obj['texts']); + $item->Time = $obj['time']; + return $item; + }, + $content + ); + } + + protected function _sortContent($content) { + return $content; + } + + public function title() { + return sprintf("%s's Facebook page posts", $this->_feed); + } + +} + +?> |