summaryrefslogtreecommitdiff
path: root/vendor/miniflux/picofeed/lib/PicoFeed/Scraper/RuleLoader.php
blob: 6650682d12bd6497648d2052c374f84d9e87abec (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
<?php

namespace PicoFeed\Scraper;

use PicoFeed\Base;
use PicoFeed\Logging\Logger;

/**
 * RuleLoader class.
 *
 * @author  Frederic Guillot
 * @author  Bernhard Posselt
 */
class RuleLoader extends Base
{
    /**
     * Get the rules for an URL.
     *
     * @param string $url the URL that should be looked up
     *
     * @return array the array containing the rules
     */
    public function getRules($url)
    {
        $hostname = parse_url($url, PHP_URL_HOST);

        if ($hostname !== false) {
            $files = $this->getRulesFileList($hostname);

            foreach ($this->getRulesFolders() as $folder) {
                $rule = $this->loadRuleFile($folder, $files);

                if (!empty($rule)) {
                    return $rule;
                }
            }
        }

        return array();
    }

    /**
     * Get the list of possible rules file names for a given hostname.
     *
     * @param string $hostname Hostname
     *
     * @return array
     */
    public function getRulesFileList($hostname)
    {
        $files = array($hostname);                 // subdomain.domain.tld
        $parts = explode('.', $hostname);
        $len = count($parts);

        if ($len > 2) {
            $subdomain = array_shift($parts);
            $files[] = implode('.', $parts);       // domain.tld
            $files[] = '.'.implode('.', $parts);   // .domain.tld
            $files[] = $subdomain;                 // subdomain
        } elseif ($len === 2) {
            $files[] = '.'.implode('.', $parts);    // .domain.tld
            $files[] = $parts[0];                   // domain
        }

        return $files;
    }

    /**
     * Load a rule file from the defined folder.
     *
     * @param string $folder Rule directory
     * @param array  $files  List of possible file names
     *
     * @return array
     */
    public function loadRuleFile($folder, array $files)
    {
        foreach ($files as $file) {
            $filename = $folder.'/'.$file.'.php';
            if (file_exists($filename)) {
                Logger::setMessage(get_called_class().' Load rule: '.$file);

                return include $filename;
            }
        }

        return array();
    }

    /**
     * Get the list of folders that contains rules.
     *
     * @return array
     */
    public function getRulesFolders()
    {
        $folders = array();

        if ($this->config !== null && $this->config->getGrabberRulesFolder() !== null) {
            $folders[] = $this->config->getGrabberRulesFolder();
        }

        $folders[] = __DIR__ . '/../Rules';

        return $folders;
    }
}