summaryrefslogtreecommitdiff
path: root/vendor/miniflux/picofeed/lib/PicoFeed/Scraper/RuleParser.php
diff options
context:
space:
mode:
authorFrédéric Guillot <fred@kanboard.net>2018-02-02 15:39:47 -0800
committerFrédéric Guillot <fred@kanboard.net>2018-02-02 15:39:47 -0800
commitbab2fb3899cc243e2f67ccf787f3657b250f6e61 (patch)
tree07ddb755330b320fbd2e2803fdb978cfff3bd452 /vendor/miniflux/picofeed/lib/PicoFeed/Scraper/RuleParser.php
parent5c4d06d26b808ea50d08f83ae02ac82373fd2208 (diff)
Remove dependency on PicoFeed
Diffstat (limited to 'vendor/miniflux/picofeed/lib/PicoFeed/Scraper/RuleParser.php')
-rw-r--r--vendor/miniflux/picofeed/lib/PicoFeed/Scraper/RuleParser.php102
1 files changed, 0 insertions, 102 deletions
diff --git a/vendor/miniflux/picofeed/lib/PicoFeed/Scraper/RuleParser.php b/vendor/miniflux/picofeed/lib/PicoFeed/Scraper/RuleParser.php
deleted file mode 100644
index 9beb59c1..00000000
--- a/vendor/miniflux/picofeed/lib/PicoFeed/Scraper/RuleParser.php
+++ /dev/null
@@ -1,102 +0,0 @@
-<?php
-
-namespace PicoFeed\Scraper;
-
-use DOMXPath;
-use PicoFeed\Parser\XmlParser;
-
-/**
- * Rule Parser.
- *
- * @author Frederic Guillot
- */
-class RuleParser implements ParserInterface
-{
- private $dom;
- private $xpath;
- private $rules = array();
-
- /**
- * Constructor.
- *
- * @param string $html
- * @param array $rules
- */
- public function __construct($html, array $rules)
- {
- $this->rules = $rules;
- $this->dom = XmlParser::getHtmlDocument('<?xml version="1.0" encoding="UTF-8">'.$html);
- $this->xpath = new DOMXPath($this->dom);
- }
-
- /**
- * Get the relevant content with predefined rules.
- *
- * @return string
- */
- public function execute()
- {
- $this->stripTags();
-
- return $this->findContent();
- }
-
- /**
- * Remove HTML tags.
- */
- public function stripTags()
- {
- if (isset($this->rules['strip']) && is_array($this->rules['strip'])) {
- foreach ($this->rules['strip'] as $pattern) {
- $nodes = $this->xpath->query($pattern);
-
- if ($nodes !== false && $nodes->length > 0) {
- foreach ($nodes as $node) {
- $node->parentNode->removeChild($node);
- }
- }
- }
- }
- }
-
- /**
- * Fetch content based on Xpath rules.
- */
- public function findContent()
- {
- $content = '';
- if (isset($this->rules['body']) && is_array($this->rules['body'])) {
- foreach ($this->rules['body'] as $pattern) {
- $nodes = $this->xpath->query($pattern);
-
- if ($nodes !== false && $nodes->length > 0) {
- foreach ($nodes as $node) {
- $content .= $this->dom->saveXML($node);
- }
- }
- }
- }
-
- return $content;
- }
-
- /**
- * Fetch next link based on Xpath rules.
- *
- * @return string
- */
- public function findNextLink()
- {
- if (isset($this->rules['next_page']) && is_array($this->rules['next_page'])) {
- foreach ($this->rules['next_page'] as $pattern) {
- $nodes = $this->xpath->query($pattern);
- if ($nodes !== false && $nodes->length > 0) {
- foreach ($nodes as $node) {
- return $node->getAttribute('href');
- }
- }
- }
- }
- return null;
- }
-}