blob: f055ad66ece6337eeaa4c79cdc41001da3001a22 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
<?php
namespace Providers;
require_once('Twitter.php');
require_once('Item.php');
require_once('../lib/querypath/src/qp.php');
class Motorsport extends \Providers\Twitter {
protected $_cacheTimeout = '15 minutes';
public function __construct($feed, $options=[]) {
parent::__construct($feed, $options);
}
protected function _getCachePath() {
return '../cache/motorsport.%s';
}
protected function _fetchItems() {
$content = parent::_fetchItems();
$links = [];
foreach ($content as $item) {
foreach ($item->entities->urls as $url) {
$links[$url->expanded_url] = $item;
}
}
$links = array_filter(
$links,
function($link) {
$linkParts = parse_url($link);
return $linkParts['host'] != 'twitter.com';
},
ARRAY_FILTER_USE_KEY
);
$content = [];
foreach ($links as $link => $i) {
$item = new \stdClass();
$item->title = $i->text;
$item->time = $i->created_at;
$item->link = $link;
if (isset($i->user)) {
$item->author = $i->user->screen_name;
}
$item->content = $this->_getLinkMetadata($this->_getLinkContent($link));
$content[] = $item;
}
return $content;
}
protected function _getLinkContent($link) {
$cacheHash = md5($link);
$cacheFile = sprintf($this->_getCachePath() . '.%s', $this->_feed, $cacheHash);
if (file_exists($cacheFile)) {
return file_get_contents($cacheFile);
} else {
$content = file_get_contents($link);
file_put_contents($cacheFile, $content);
return $content;
}
}
protected function _getLinkMetadata($content) {
$tree = htmlqp($content);
$meta = $tree->find('meta');
$metadata = [];
foreach ($meta as $tag) {
$attributes = $tag->attr();
if (isset($attributes['property'])) {
$metadata[$attributes['property']] = $attributes['content'];
}
}
if (isset($metadata['og:url'])) {
$originalUrl = parse_url($metadata['og:url']);
if (substr($originalUrl['host'], -22) != 'motorsportmagazine.com') {
unset($metadata['og:url']);
}
}
return $metadata;
}
protected function _spamFilter($items) {
return array_filter(
$items,
function ($item) {
return isset($item->content['article:published_time']);
}
);
}
protected function _mapItems($content) {
$items = [];
foreach ($content as $i) {
$url = isset($i->content['og:url']) ? $i->content['og:url'] : $i->link;
$item = new Item();
$item->ID = md5($url);
$item->Title = isset($i->content['article:published_time']) ? $i->content['og:title'] : $i->title;
$item->Link = $url;
$item->Text = isset($i->content['article:published_time']) ? $i->content['og:description'] : $i->content['og:title'];
$item->Time = isset($i->content['article:published_time']) ? $i->content['article:published_time'] : $i->time;
if (isset($i->author)) {
$item->Author = $i->author;
}
$items[] = $item;
}
return $items;
}
protected function _sortContent($content) {
return $content;
}
}
?>
|