diff options
author | Frédéric Guillot <fred@kanboard.net> | 2018-02-02 15:39:47 -0800 |
---|---|---|
committer | Frédéric Guillot <fred@kanboard.net> | 2018-02-02 15:39:47 -0800 |
commit | bab2fb3899cc243e2f67ccf787f3657b250f6e61 (patch) | |
tree | 07ddb755330b320fbd2e2803fdb978cfff3bd452 /vendor/miniflux/picofeed/lib/PicoFeed/Scraper/RuleParser.php | |
parent | 5c4d06d26b808ea50d08f83ae02ac82373fd2208 (diff) |
Remove dependency on PicoFeed
Diffstat (limited to 'vendor/miniflux/picofeed/lib/PicoFeed/Scraper/RuleParser.php')
-rw-r--r-- | vendor/miniflux/picofeed/lib/PicoFeed/Scraper/RuleParser.php | 102 |
1 files changed, 0 insertions, 102 deletions
diff --git a/vendor/miniflux/picofeed/lib/PicoFeed/Scraper/RuleParser.php b/vendor/miniflux/picofeed/lib/PicoFeed/Scraper/RuleParser.php deleted file mode 100644 index 9beb59c1..00000000 --- a/vendor/miniflux/picofeed/lib/PicoFeed/Scraper/RuleParser.php +++ /dev/null @@ -1,102 +0,0 @@ -<?php - -namespace PicoFeed\Scraper; - -use DOMXPath; -use PicoFeed\Parser\XmlParser; - -/** - * Rule Parser. - * - * @author Frederic Guillot - */ -class RuleParser implements ParserInterface -{ - private $dom; - private $xpath; - private $rules = array(); - - /** - * Constructor. - * - * @param string $html - * @param array $rules - */ - public function __construct($html, array $rules) - { - $this->rules = $rules; - $this->dom = XmlParser::getHtmlDocument('<?xml version="1.0" encoding="UTF-8">'.$html); - $this->xpath = new DOMXPath($this->dom); - } - - /** - * Get the relevant content with predefined rules. - * - * @return string - */ - public function execute() - { - $this->stripTags(); - - return $this->findContent(); - } - - /** - * Remove HTML tags. - */ - public function stripTags() - { - if (isset($this->rules['strip']) && is_array($this->rules['strip'])) { - foreach ($this->rules['strip'] as $pattern) { - $nodes = $this->xpath->query($pattern); - - if ($nodes !== false && $nodes->length > 0) { - foreach ($nodes as $node) { - $node->parentNode->removeChild($node); - } - } - } - } - } - - /** - * Fetch content based on Xpath rules. - */ - public function findContent() - { - $content = ''; - if (isset($this->rules['body']) && is_array($this->rules['body'])) { - foreach ($this->rules['body'] as $pattern) { - $nodes = $this->xpath->query($pattern); - - if ($nodes !== false && $nodes->length > 0) { - foreach ($nodes as $node) { - $content .= $this->dom->saveXML($node); - } - } - } - } - - return $content; - } - - /** - * Fetch next link based on Xpath rules. - * - * @return string - */ - public function findNextLink() - { - if (isset($this->rules['next_page']) && is_array($this->rules['next_page'])) { - foreach ($this->rules['next_page'] as $pattern) { - $nodes = $this->xpath->query($pattern); - if ($nodes !== false && $nodes->length > 0) { - foreach ($nodes as $node) { - return $node->getAttribute('href'); - } - } - } - } - return null; - } -} |