diff options
author | emkael <emkael@tlen.pl> | 2017-01-18 20:07:16 +0100 |
---|---|---|
committer | emkael <emkael@tlen.pl> | 2017-01-18 20:07:16 +0100 |
commit | 9a9c04512e5dcb77c7fe5d850e3f2a0250cc160e (patch) | |
tree | fed46b5f4c2ed3a050bb1a7ad7c6d0a3ea844d55 /providers/Motorsport.php | |
parent | c5bcf8f74fb80b7e163663845b0d6e35cabface3 (diff) |
* Motor Sport Magazine feed provider
Diffstat (limited to 'providers/Motorsport.php')
-rw-r--r-- | providers/Motorsport.php | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/providers/Motorsport.php b/providers/Motorsport.php new file mode 100644 index 0000000..f055ad6 --- /dev/null +++ b/providers/Motorsport.php @@ -0,0 +1,116 @@ +<?php + +namespace Providers; + +require_once('Twitter.php'); +require_once('Item.php'); +require_once('../lib/querypath/src/qp.php'); + +class Motorsport extends \Providers\Twitter { + + protected $_cacheTimeout = '15 minutes'; + + public function __construct($feed, $options=[]) { + parent::__construct($feed, $options); + } + + protected function _getCachePath() { + return '../cache/motorsport.%s'; + } + + protected function _fetchItems() { + $content = parent::_fetchItems(); + $links = []; + foreach ($content as $item) { + foreach ($item->entities->urls as $url) { + $links[$url->expanded_url] = $item; + } + } + $links = array_filter( + $links, + function($link) { + $linkParts = parse_url($link); + return $linkParts['host'] != 'twitter.com'; + }, + ARRAY_FILTER_USE_KEY + ); + $content = []; + foreach ($links as $link => $i) { + $item = new \stdClass(); + $item->title = $i->text; + $item->time = $i->created_at; + $item->link = $link; + if (isset($i->user)) { + $item->author = $i->user->screen_name; + } + $item->content = $this->_getLinkMetadata($this->_getLinkContent($link)); + $content[] = $item; + } + return $content; + } + + protected function _getLinkContent($link) { + $cacheHash = md5($link); + $cacheFile = sprintf($this->_getCachePath() . '.%s', $this->_feed, $cacheHash); + if (file_exists($cacheFile)) { + return file_get_contents($cacheFile); + } else { + $content = file_get_contents($link); + file_put_contents($cacheFile, $content); + return $content; + } + } + + protected function _getLinkMetadata($content) { + $tree = htmlqp($content); + $meta = $tree->find('meta'); + $metadata = []; + foreach ($meta as $tag) { + $attributes = $tag->attr(); + if (isset($attributes['property'])) { + $metadata[$attributes['property']] = $attributes['content']; + } + } + if (isset($metadata['og:url'])) { + $originalUrl = parse_url($metadata['og:url']); + if (substr($originalUrl['host'], -22) != 'motorsportmagazine.com') { + unset($metadata['og:url']); + } + } + return $metadata; + } + + protected function _spamFilter($items) { + return array_filter( + $items, + function ($item) { + return isset($item->content['article:published_time']); + } + ); + } + + protected function _mapItems($content) { + $items = []; + foreach ($content as $i) { + $url = isset($i->content['og:url']) ? $i->content['og:url'] : $i->link; + $item = new Item(); + $item->ID = md5($url); + $item->Title = isset($i->content['article:published_time']) ? $i->content['og:title'] : $i->title; + $item->Link = $url; + $item->Text = isset($i->content['article:published_time']) ? $i->content['og:description'] : $i->content['og:title']; + $item->Time = isset($i->content['article:published_time']) ? $i->content['article:published_time'] : $i->time; + if (isset($i->author)) { + $item->Author = $i->author; + } + $items[] = $item; + } + return $items; + } + + protected function _sortContent($content) { + return $content; + } + +} + +?> |