summaryrefslogtreecommitdiff
path: root/providers/Motorsport.php
blob: f055ad66ece6337eeaa4c79cdc41001da3001a22 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
<?php

namespace Providers;

require_once('Twitter.php');
require_once('Item.php');
require_once('../lib/querypath/src/qp.php');

class Motorsport extends \Providers\Twitter {

    protected $_cacheTimeout = '15 minutes';

    public function __construct($feed, $options=[]) {
        parent::__construct($feed, $options);
    }

    protected function _getCachePath() {
        return '../cache/motorsport.%s';
    }

    protected function _fetchItems() {
        $content = parent::_fetchItems();
        $links = [];
        foreach ($content as $item) {
            foreach ($item->entities->urls as $url) {
                $links[$url->expanded_url] = $item;
            }
        }
        $links = array_filter(
            $links,
            function($link) {
                $linkParts = parse_url($link);
                return $linkParts['host'] != 'twitter.com';
            },
            ARRAY_FILTER_USE_KEY
        );
        $content = [];
        foreach ($links as $link => $i) {
            $item = new \stdClass();
            $item->title = $i->text;
            $item->time = $i->created_at;
            $item->link = $link;
            if (isset($i->user)) {
                $item->author = $i->user->screen_name;
            }
            $item->content = $this->_getLinkMetadata($this->_getLinkContent($link));
            $content[] = $item;
        }
        return $content;
    }

    protected function _getLinkContent($link) {
        $cacheHash = md5($link);
        $cacheFile = sprintf($this->_getCachePath() . '.%s', $this->_feed, $cacheHash);
        if (file_exists($cacheFile)) {
            return file_get_contents($cacheFile);
        } else {
            $content = file_get_contents($link);
            file_put_contents($cacheFile, $content);
            return $content;
        }
    }

    protected function _getLinkMetadata($content) {
        $tree = htmlqp($content);
        $meta = $tree->find('meta');
        $metadata = [];
        foreach ($meta as $tag) {
            $attributes = $tag->attr();
            if (isset($attributes['property'])) {
                $metadata[$attributes['property']] = $attributes['content'];
            }
        }
        if (isset($metadata['og:url'])) {
            $originalUrl = parse_url($metadata['og:url']);
            if (substr($originalUrl['host'], -22) != 'motorsportmagazine.com') {
                unset($metadata['og:url']);
            }
        }
        return $metadata;
    }

    protected function _spamFilter($items) {
        return array_filter(
            $items,
            function ($item) {
                return isset($item->content['article:published_time']);
            }
        );
    }

    protected function _mapItems($content) {
        $items = [];
        foreach ($content as $i) {
            $url = isset($i->content['og:url']) ? $i->content['og:url'] : $i->link;
            $item = new Item();
            $item->ID = md5($url);
            $item->Title = isset($i->content['article:published_time']) ? $i->content['og:title'] : $i->title;
            $item->Link = $url;
            $item->Text = isset($i->content['article:published_time']) ? $i->content['og:description'] : $i->content['og:title'];
            $item->Time = isset($i->content['article:published_time']) ? $i->content['article:published_time'] : $i->time;
            if (isset($i->author)) {
                $item->Author = $i->author;
            }
            $items[] = $item;
        }
        return $items;
    }

    protected function _sortContent($content) {
      return $content;
    }

}

?>