diff options
Diffstat (limited to 'providers')
-rw-r--r-- | providers/Facebook.php | 18 | ||||
-rw-r--r-- | providers/Pagediff.php | 110 | ||||
-rw-r--r-- | providers/Provider.php | 9 | ||||
-rw-r--r-- | providers/Rss.php | 4 | ||||
-rw-r--r-- | providers/Twitter.php | 2 | ||||
-rw-r--r-- | providers/XmlFeed.php | 2 | ||||
-rw-r--r-- | providers/Youtube.php | 19 |
7 files changed, 159 insertions, 5 deletions
diff --git a/providers/Facebook.php b/providers/Facebook.php index 20f5028..384c013 100644 --- a/providers/Facebook.php +++ b/providers/Facebook.php @@ -26,7 +26,23 @@ class Facebook extends \Providers\Provider { implode(DIRECTORY_SEPARATOR, [dirname(__FILE__), '..', 'bin', 'fb-scrape']) . ' ' . 'python ' . implode(DIRECTORY_SEPARATOR, [dirname(__FILE__), '..', 'bin', 'fb-scrape', 'get-fb-content.py']) . ' ' . escapeshellarg($this->_feed), $jsonContent); - return json_decode(implode(PHP_EOL, $jsonContent), TRUE); + $cacheFile = sprintf($this->_getCachePath(), $this->_feed); + if (file_exists($cacheFile)) { + $cache = unserialize($this->_getCache($cacheFile)); + } + else { + $cache = []; + } + $fetched = json_decode(implode(PHP_EOL, $jsonContent), TRUE); + $cacheIDs = array_map(function($obj) { + return $obj['id']; + }, $cache); + foreach ($fetched as $fetchedItem) { + if (!in_array($fetchedItem['id'], $cacheIDs)) { + $cache[] = $fetchedItem; + } + } + return $cache; } protected function _mapItems($content) { diff --git a/providers/Pagediff.php b/providers/Pagediff.php new file mode 100644 index 0000000..8679532 --- /dev/null +++ b/providers/Pagediff.php @@ -0,0 +1,110 @@ +<?php + +namespace Providers; + +require_once('HtmlFeed.php'); +require_once('Item.php'); + +class Pagediff extends \Providers\HtmlFeed { + + protected $_cacheTimeout = '1 hour'; + + public function __construct($feed, $options=[]) { + $config = json_decode(file_get_contents('../config/pagediff.json'), TRUE); + if (!isset($config[$feed])) { + throw new \Exception(sprintf('Feed %s not configured', $feed)); + } + $this->_config = $config[$feed]; + parent::__construct($feed, $options); + } + + protected function _getCachePath() { + return '../cache/pagediff.%s'; + } + + protected function _getFeedUrl($feed) { + return $this->_config['url']; + } + + private function _getItemCachePath() { + return sprintf('../cache/pagediff.items.%s', $this->_feed); + } + + private function _getCachedContent() { + if (!file_exists($this->_getItemCachePath())) { + return []; + } + return unserialize( + file_get_contents( + $this->_getItemCachePath() + ) + ); + } + + private function _saveCachedContent($content) { + return file_put_contents( + $this->_getItemCachePath(), + serialize($content) + ); + } + + private function _getContentFromSelector($tree, $selector) { + $node = $tree->find($selector['node']); + if ($node->count() == 0) { + return NULL; + } + if ($node->count() != 1) { + if (isset($selector['index'])) { + $node = $node->eq($selector['index']); + } else { + $node = $node->first(); + } + } + if (isset($selector['html'])) { + return $node->innerHTML(); + } + if (isset($selector['attr'])) { + $text = $node->attr()[$selector['attr']]; + } else { + $text = $node->text(); + } + if (isset($selector['transform'])) { + $text = sprintf($selector['transform'], $text); + } + return $text; + } + + protected function _parseFeedContent($tree) { + $selectors = $this->_config['selectors']; + $items = $this->_getCachedContent(); + $currentItem = []; + foreach (['id', 'link', 'name', 'text'] as $type) { + $currentItem[$type] = $this->_getContentFromSelector($tree, $selectors[$type]); + } + $currentItem['time'] = date('Y-m-d H:i:s'); + if (!count($items) || $currentItem['id'] != $items[0]['id']) { + $items = array_merge([$currentItem], $items); + $this->_saveCachedContent($items); + } + return $items; + } + + protected function _mapItems($items) { + return array_map(function($item) { + $i = new Item(); + $i->ID = $item['id']; + $i->Title = $item['name']; + $i->Time = $item['time']; + $i->Text = $item['text']; + $i->Link = $item['link']; + return $i; + }, $items); + } + + public function title() { + return $this->_config['title']; + } + +} + +?> diff --git a/providers/Provider.php b/providers/Provider.php index 434b1d1..fe2195c 100644 --- a/providers/Provider.php +++ b/providers/Provider.php @@ -35,6 +35,9 @@ abstract class Provider { return unserialize($this->_getCache($cacheFile)); } else { $content = $this->_fetchItems(); + if (empty($content) && file_exists($cacheFile)) { + return unserialize($this->_getCache($cacheFile)); + } file_put_contents($cacheFile, serialize($content)); $this->_cacheTime = time(); return $content; @@ -51,6 +54,12 @@ abstract class Provider { } } } + if (array_key_exists('title', $this->_options)) { + $keyword = strtolower($this->_options['title']); + $items = array_filter($items, function($item) use($keyword) { + return str_contains(strtolower($item->Title), $keyword); + }); + } return $items; } diff --git a/providers/Rss.php b/providers/Rss.php index 8e20e3f..61e5321 100644 --- a/providers/Rss.php +++ b/providers/Rss.php @@ -31,13 +31,13 @@ class Rss extends \Providers\XmlFeed { protected function _mapItems($content) { $items = []; foreach ($content as $contentString) { - $itemString = str_replace(['content:encoded>', '<dc:', '</dc:', '<media:', '</media:', '<wfw:', '</wfw:'], ['content>', '<', '</', '<', '</', '<', '</'], $contentString); + $itemString = str_replace(['content:encoded>', '<yt:', '</yt:', '<dc:', '</dc:', '<media:', '</media:', '<wfw:', '</wfw:'], ['content>', '<', '</', '<', '</', '<', '</', '<', '</'], $contentString); $item = new \SimpleXMLElement($itemString); $itemObject = new Item(); $itemObject->ID = strval($item->id ?: $item->guid) ?: ltrim(parse_url(strval($item->link))['path'], '/'); $itemObject->Title = strval($item->title); $itemObject->Time = strval($item->published ?: $item->pubDate ?: $item->updated); - $itemObject->Text = strval($item->summary ?: $item->description ?: $item->content) ?: ($item->description ?: $item->content)->children()->asXML(); + $itemObject->Text = strval($item->summary ?: $item->description ?: $item->content ?: $item->group->description); $itemObject->Link = strval(isset($item->link['href']) ? $item->link->attributes()['href'] : $item->link); $itemObject->Author = strval($item->creator ? $item->creator : (is_string($item->author) ? $item->author : $item->author->name)); $items[] = $itemObject; diff --git a/providers/Twitter.php b/providers/Twitter.php index 9a98364..11532b5 100644 --- a/providers/Twitter.php +++ b/providers/Twitter.php @@ -45,7 +45,7 @@ class Twitter extends \Providers\Provider { }, $content->errors )); } - throw new Exception($errorString); + throw new \Exception($errorString); } unset($content->httpstatus); diff --git a/providers/XmlFeed.php b/providers/XmlFeed.php index d5ecd1c..0ac5e6f 100644 --- a/providers/XmlFeed.php +++ b/providers/XmlFeed.php @@ -15,7 +15,7 @@ abstract class XmlFeed extends \Providers\Provider { parent::__construct($feed, $options); $this->_feedUrl = $this->_getFeedUrl($feed); if (!$this->_feedUrl) { - throw new Exception('XML feed "' . $feed . '" undefined'); + throw new \Exception('XML feed "' . $feed . '" undefined'); } } diff --git a/providers/Youtube.php b/providers/Youtube.php new file mode 100644 index 0000000..bd38a52 --- /dev/null +++ b/providers/Youtube.php @@ -0,0 +1,19 @@ +<?php + +namespace Providers; + +require_once('Rss.php'); + +class Youtube extends Rss { + + protected function _getFeedUrl($feed) { + return 'https://www.youtube.com/feeds/videos.xml?channel_id=' . $feed; + } + + protected function _getCachePath() { + return '../cache/youtube.%s'; + } + +} + +?> |