summaryrefslogtreecommitdiff
path: root/providers
diff options
context:
space:
mode:
authoremkael <emkael@tlen.pl>2017-01-18 20:07:16 +0100
committeremkael <emkael@tlen.pl>2017-01-18 20:07:16 +0100
commit9a9c04512e5dcb77c7fe5d850e3f2a0250cc160e (patch)
treefed46b5f4c2ed3a050bb1a7ad7c6d0a3ea844d55 /providers
parentc5bcf8f74fb80b7e163663845b0d6e35cabface3 (diff)
* Motor Sport Magazine feed provider
Diffstat (limited to 'providers')
-rw-r--r--providers/Motorsport.php116
1 files changed, 116 insertions, 0 deletions
diff --git a/providers/Motorsport.php b/providers/Motorsport.php
new file mode 100644
index 0000000..f055ad6
--- /dev/null
+++ b/providers/Motorsport.php
@@ -0,0 +1,116 @@
+<?php
+
+namespace Providers;
+
+require_once('Twitter.php');
+require_once('Item.php');
+require_once('../lib/querypath/src/qp.php');
+
+class Motorsport extends \Providers\Twitter {
+
+ protected $_cacheTimeout = '15 minutes';
+
+ public function __construct($feed, $options=[]) {
+ parent::__construct($feed, $options);
+ }
+
+ protected function _getCachePath() {
+ return '../cache/motorsport.%s';
+ }
+
+ protected function _fetchItems() {
+ $content = parent::_fetchItems();
+ $links = [];
+ foreach ($content as $item) {
+ foreach ($item->entities->urls as $url) {
+ $links[$url->expanded_url] = $item;
+ }
+ }
+ $links = array_filter(
+ $links,
+ function($link) {
+ $linkParts = parse_url($link);
+ return $linkParts['host'] != 'twitter.com';
+ },
+ ARRAY_FILTER_USE_KEY
+ );
+ $content = [];
+ foreach ($links as $link => $i) {
+ $item = new \stdClass();
+ $item->title = $i->text;
+ $item->time = $i->created_at;
+ $item->link = $link;
+ if (isset($i->user)) {
+ $item->author = $i->user->screen_name;
+ }
+ $item->content = $this->_getLinkMetadata($this->_getLinkContent($link));
+ $content[] = $item;
+ }
+ return $content;
+ }
+
+ protected function _getLinkContent($link) {
+ $cacheHash = md5($link);
+ $cacheFile = sprintf($this->_getCachePath() . '.%s', $this->_feed, $cacheHash);
+ if (file_exists($cacheFile)) {
+ return file_get_contents($cacheFile);
+ } else {
+ $content = file_get_contents($link);
+ file_put_contents($cacheFile, $content);
+ return $content;
+ }
+ }
+
+ protected function _getLinkMetadata($content) {
+ $tree = htmlqp($content);
+ $meta = $tree->find('meta');
+ $metadata = [];
+ foreach ($meta as $tag) {
+ $attributes = $tag->attr();
+ if (isset($attributes['property'])) {
+ $metadata[$attributes['property']] = $attributes['content'];
+ }
+ }
+ if (isset($metadata['og:url'])) {
+ $originalUrl = parse_url($metadata['og:url']);
+ if (substr($originalUrl['host'], -22) != 'motorsportmagazine.com') {
+ unset($metadata['og:url']);
+ }
+ }
+ return $metadata;
+ }
+
+ protected function _spamFilter($items) {
+ return array_filter(
+ $items,
+ function ($item) {
+ return isset($item->content['article:published_time']);
+ }
+ );
+ }
+
+ protected function _mapItems($content) {
+ $items = [];
+ foreach ($content as $i) {
+ $url = isset($i->content['og:url']) ? $i->content['og:url'] : $i->link;
+ $item = new Item();
+ $item->ID = md5($url);
+ $item->Title = isset($i->content['article:published_time']) ? $i->content['og:title'] : $i->title;
+ $item->Link = $url;
+ $item->Text = isset($i->content['article:published_time']) ? $i->content['og:description'] : $i->content['og:title'];
+ $item->Time = isset($i->content['article:published_time']) ? $i->content['article:published_time'] : $i->time;
+ if (isset($i->author)) {
+ $item->Author = $i->author;
+ }
+ $items[] = $item;
+ }
+ return $items;
+ }
+
+ protected function _sortContent($content) {
+ return $content;
+ }
+
+}
+
+?>