From 9a9c04512e5dcb77c7fe5d850e3f2a0250cc160e Mon Sep 17 00:00:00 2001 From: emkael Date: Wed, 18 Jan 2017 20:07:16 +0100 Subject: * Motor Sport Magazine feed provider --- providers/Motorsport.php | 116 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 providers/Motorsport.php (limited to 'providers') diff --git a/providers/Motorsport.php b/providers/Motorsport.php new file mode 100644 index 0000000..f055ad6 --- /dev/null +++ b/providers/Motorsport.php @@ -0,0 +1,116 @@ +entities->urls as $url) { + $links[$url->expanded_url] = $item; + } + } + $links = array_filter( + $links, + function($link) { + $linkParts = parse_url($link); + return $linkParts['host'] != 'twitter.com'; + }, + ARRAY_FILTER_USE_KEY + ); + $content = []; + foreach ($links as $link => $i) { + $item = new \stdClass(); + $item->title = $i->text; + $item->time = $i->created_at; + $item->link = $link; + if (isset($i->user)) { + $item->author = $i->user->screen_name; + } + $item->content = $this->_getLinkMetadata($this->_getLinkContent($link)); + $content[] = $item; + } + return $content; + } + + protected function _getLinkContent($link) { + $cacheHash = md5($link); + $cacheFile = sprintf($this->_getCachePath() . '.%s', $this->_feed, $cacheHash); + if (file_exists($cacheFile)) { + return file_get_contents($cacheFile); + } else { + $content = file_get_contents($link); + file_put_contents($cacheFile, $content); + return $content; + } + } + + protected function _getLinkMetadata($content) { + $tree = htmlqp($content); + $meta = $tree->find('meta'); + $metadata = []; + foreach ($meta as $tag) { + $attributes = $tag->attr(); + if (isset($attributes['property'])) { + $metadata[$attributes['property']] = $attributes['content']; + } + } + if (isset($metadata['og:url'])) { + $originalUrl = parse_url($metadata['og:url']); + if (substr($originalUrl['host'], -22) != 'motorsportmagazine.com') { + unset($metadata['og:url']); + } + } + return $metadata; + } + + protected function _spamFilter($items) { + return array_filter( + $items, + function ($item) { + return isset($item->content['article:published_time']); + } + ); + } + + protected function _mapItems($content) { + $items = []; + foreach ($content as $i) { + $url = isset($i->content['og:url']) ? $i->content['og:url'] : $i->link; + $item = new Item(); + $item->ID = md5($url); + $item->Title = isset($i->content['article:published_time']) ? $i->content['og:title'] : $i->title; + $item->Link = $url; + $item->Text = isset($i->content['article:published_time']) ? $i->content['og:description'] : $i->content['og:title']; + $item->Time = isset($i->content['article:published_time']) ? $i->content['article:published_time'] : $i->time; + if (isset($i->author)) { + $item->Author = $i->author; + } + $items[] = $item; + } + return $items; + } + + protected function _sortContent($content) { + return $content; + } + +} + +?> -- cgit v1.2.3