From 9a9c04512e5dcb77c7fe5d850e3f2a0250cc160e Mon Sep 17 00:00:00 2001 From: emkael Date: Wed, 18 Jan 2017 20:07:16 +0100 Subject: * Motor Sport Magazine feed provider --- .../src/QueryPath/CSS/DOMTraverser/PseudoClass.php | 421 +++++++++++++++++++++ .../src/QueryPath/CSS/DOMTraverser/Util.php | 139 +++++++ 2 files changed, 560 insertions(+) create mode 100644 lib/querypath/src/QueryPath/CSS/DOMTraverser/PseudoClass.php create mode 100644 lib/querypath/src/QueryPath/CSS/DOMTraverser/Util.php (limited to 'lib/querypath/src/QueryPath/CSS/DOMTraverser') diff --git a/lib/querypath/src/QueryPath/CSS/DOMTraverser/PseudoClass.php b/lib/querypath/src/QueryPath/CSS/DOMTraverser/PseudoClass.php new file mode 100644 index 0000000..0bcaf79 --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/DOMTraverser/PseudoClass.php @@ -0,0 +1,421 @@ +lang($node, $value); + case 'any-link': + return Util::matchesAttribute($node, 'href') + || Util::matchesAttribute($node, 'src') + || Util::matchesAttribute($node, 'link'); + case 'link': + return Util::matchesAttribute($node, 'href'); + case 'local-link': + return $this->isLocalLink($node); + case 'root': + return $node->isSameNode($node->ownerDocument->documentElement); + + // CSS 4 declares the :scope pseudo-class, which describes what was + // the :x-root QueryPath extension. + case 'x-root': + case 'x-reset': + case 'scope': + return $node->isSameNode($scope); + // NON-STANDARD extensions for simple support of even and odd. These + // are supported by jQuery, FF, and other user agents. + case 'even': + return $this->isNthChild($node, 'even'); + case 'odd': + return $this->isNthChild($node, 'odd'); + case 'nth-child': + return $this->isNthChild($node, $value); + case 'nth-last-child': + return $this->isNthChild($node, $value, TRUE); + case 'nth-of-type': + return $this->isNthChild($node, $value, FALSE, TRUE); + case 'nth-last-of-type': + return $this->isNthChild($node, $value, TRUE, TRUE); + case 'first-of-type': + return $this->isFirstOfType($node); + case 'last-of-type': + return $this->isLastOfType($node); + case 'only-of-type': + return $this->isFirstOfType($node) && $this->isLastOfType($node); + + // Additional pseudo-classes defined in jQuery: + case 'lt': + // I'm treating this as "less than or equal to". + $rule = sprintf('-n + %d', (int) $value); + // $rule = '-n+15'; + return $this->isNthChild($node, $rule); + case 'gt': + // I'm treating this as "greater than" + // return $this->nodePositionFromEnd($node) > (int) $value; + return $this->nodePositionFromStart($node) > (int) $value; + case 'nth': + case 'eq': + $rule = (int)$value; + return $this->isNthChild($node, $rule); + case 'first': + return $this->isNthChild($node, 1); + case 'first-child': + return $this->isFirst($node); + case 'last': + case 'last-child': + return $this->isLast($node); + case 'only-child': + return $this->isFirst($node) && $this->isLast($node); + case 'empty': + return $this->isEmpty($node); + case 'parent': + return !$this->isEmpty($node); + + case 'enabled': + case 'disabled': + case 'checked': + return Util::matchesAttribute($node, $name); + case 'text': + case 'radio': + case 'checkbox': + case 'file': + case 'password': + case 'submit': + case 'image': + case 'reset': + case 'button': + return Util::matchesAttribute($node, 'type', $name); + + case 'header': + return $this->header($node); + case 'has': + case 'matches': + return $this->has($node, $value); + break; + case 'not': + if (empty($value)) { + throw new ParseException(":not() requires a value."); + } + return $this->isNot($node, $value); + // Contains == text matches. + // In QP 2.1, this was changed. + case 'contains': + return $this->contains($node, $value); + // Since QP 2.1 + case 'contains-exactly': + return $this->containsExactly($node, $value); + default: + throw new \QueryPath\CSS\ParseException("Unknown Pseudo-Class: " . $name); + } + $this->findAnyElement = FALSE; + } + + /** + * Pseudo-class handler for :lang + * + * Note that this does not implement the spec in its entirety because we do + * not presume to "know the language" of the document. If anyone is interested + * in making this more intelligent, please do so. + */ + protected function lang($node, $value) { + // TODO: This checks for cases where an explicit language is + // set. The spec seems to indicate that an element should inherit + // language from the parent... but this is unclear. + $operator = (strpos($value, '-') !== FALSE) ? EventHandler::isExactly : EventHandler::containsWithHyphen; + + $match = TRUE; + foreach ($node->attributes as $attrNode) { + if ($attrNode->localName == 'lang') { + + if ($attrNode->nodeName == $attrNode->localName) { + // fprintf(STDOUT, "%s in NS %s\n", $attrNode->name, $attrNode->nodeName); + return Util::matchesAttribute($node, 'lang', $value, $operator); + } + else { + $nsuri = $attrNode->namespaceURI; + // fprintf(STDOUT, "%s in NS %s\n", $attrNode->name, $nsuri); + return Util::matchesAttributeNS($node, 'lang', $nsuri, $value, $operator); + } + + } + } + return FALSE; + } + + /** + * Provides jQuery pseudoclass ':header'. + */ + protected function header($node) { + return preg_match('/^h[1-9]$/i', $node->tagName) == 1; + } + + /** + * Provides pseudoclass :empty. + */ + protected function isEmpty($node) { + foreach ($node->childNodes as $kid) { + // We don't want to count PIs and comments. From the spec, it + // appears that CDATA is also not counted. + if ($kid->nodeType == XML_ELEMENT_NODE || $kid->nodeType == XML_TEXT_NODE) { + // As soon as we hit a FALSE, return. + return FALSE; + } + } + return TRUE; + } + + /** + * Provides jQuery pseudoclass :first. + * + * @todo + * This can be replaced by isNthChild(). + */ + protected function isFirst($node) { + while (isset($node->previousSibling)) { + $node = $node->previousSibling; + if ($node->nodeType == XML_ELEMENT_NODE) { + return FALSE; + } + } + return TRUE; + } + /** + * Fast version of first-of-type. + */ + protected function isFirstOfType($node) { + $type = $node->tagName; + while (isset($node->previousSibling)) { + $node = $node->previousSibling; + if ($node->nodeType == XML_ELEMENT_NODE && $node->tagName == $type) { + return FALSE; + } + } + return TRUE; + } + /** + * Fast version of jQuery :last. + */ + protected function isLast($node) { + while (isset($node->nextSibling)) { + $node = $node->nextSibling; + if ($node->nodeType == XML_ELEMENT_NODE) { + return FALSE; + } + } + return TRUE; + } + /** + * Provides last-of-type. + */ + protected function isLastOfType($node) { + $type = $node->tagName; + while (isset($node->nextSibling)) { + $node = $node->nextSibling; + if ($node->nodeType == XML_ELEMENT_NODE && $node->tagName == $type) { + return FALSE; + } + } + return TRUE; + } + /** + * Provides :contains() as the original spec called for. + * + * This is an INEXACT match. + */ + protected function contains($node, $value) { + $text = $node->textContent; + $value = Util::removeQuotes($value); + return isset($text) && (stripos($text, $value) !== FALSE); + } + /** + * Provides :contains-exactly QueryPath pseudoclass. + * + * This is an EXACT match. + */ + protected function containsExactly($node, $value) { + $text = $node->textContent; + $value = Util::removeQuotes($value); + return isset($text) && $text == $value; + } + + /** + * Provides :has pseudoclass. + */ + protected function has($node, $selector) { + $splos = new \SPLObjectStorage(); + $splos->attach($node); + $traverser = new \QueryPath\CSS\DOMTraverser($splos, TRUE); + $results = $traverser->find($selector)->matches(); + return count($results) > 0; + } + + /** + * Provides :not pseudoclass. + */ + protected function isNot($node, $selector) { + return !$this->has($node, $selector); + } + + /** + * Get the relative position of a node in its sibling set. + */ + protected function nodePositionFromStart($node, $byType = FALSE) { + $i = 1; + $tag = $node->tagName; + while (isset($node->previousSibling)) { + $node = $node->previousSibling; + if ($node->nodeType == XML_ELEMENT_NODE && (!$byType || $node->tagName == $tag)) { + ++$i; + } + } + return $i; + } + /** + * Get the relative position of a node in its sibling set. + */ + protected function nodePositionFromEnd($node, $byType = FALSE) { + $i = 1; + $tag = $node->tagName; + while (isset($node->nextSibling)) { + $node = $node->nextSibling; + if ($node->nodeType == XML_ELEMENT_NODE && (!$byType || $node->tagName == $tag)) { + ++$i; + } + } + return $i; + } + + /** + * Provides functionality for all "An+B" rules. + * Provides nth-child and also the functionality required for: + * + *- nth-last-child + *- even + *- odd + *- first + *- last + *- eq + *- nth + *- nth-of-type + *- first-of-type + *- last-of-type + *- nth-last-of-type + * + * See also QueryPath::CSS::DOMTraverser::Util::parseAnB(). + */ + protected function isNthChild($node, $value, $reverse = FALSE, $byType = FALSE) { + list($groupSize, $elementInGroup) = Util::parseAnB($value); + $parent = $node->parentNode; + if (empty($parent) + || ($groupSize == 0 && $elementInGroup == 0) + || ($groupSize > 0 && $elementInGroup > $groupSize) + ) { + return FALSE; + } + + // First we need to find the position of $node in other elements. + if ($reverse) { + $pos = $this->nodePositionFromEnd($node, $byType); + } + else { + $pos = $this->nodePositionFromStart($node, $byType); + } + + // If group size is 0, we just check to see if this + // is the nth element: + if ($groupSize == 0) { + return $pos == $elementInGroup; + } + + // Next, we normalize $elementInGroup + if ($elementInGroup < 0) { + $elementInGroup = $groupSize + $elementInGroup; + } + + + $prod = ($pos - $elementInGroup) / $groupSize; + // fprintf(STDOUT, "%d n + %d on %d is %3.5f\n", $groupSize, $elementInGroup, $pos, $prod); + + return is_int($prod) && $prod >= 0; + } + + protected function isLocalLink($node) { + if (!$node->hasAttribute('href')) { + return FALSE; + } + $url = $node->getAttribute('href'); + $scheme = parse_url($url, PHP_URL_SCHEME); + return empty($scheme) || $scheme == 'file'; + } + +} diff --git a/lib/querypath/src/QueryPath/CSS/DOMTraverser/Util.php b/lib/querypath/src/QueryPath/CSS/DOMTraverser/Util.php new file mode 100644 index 0000000..ec01d8f --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/DOMTraverser/Util.php @@ -0,0 +1,139 @@ +hasAttribute($name)) { + return FALSE; + } + + if (is_null($value)) { + return TRUE; + } + + return self::matchesAttributeValue($value, $node->getAttribute($name), $operation); + } + /** + * Check whether the given DOMElement has the given namespaced attribute. + */ + public static function matchesAttributeNS($node, $name, $nsuri, $value = NULL, $operation = EventHandler::isExactly) { + if (!$node->hasAttributeNS($nsuri, $name)) { + return FALSE; + } + + if (is_null($value)) { + return TRUE; + } + + return self::matchesAttributeValue($value, $node->getAttributeNS($nsuri, $name), $operation); + } + + /** + * Check for attr value matches based on an operation. + */ + public static function matchesAttributeValue($needle, $haystack, $operation) { + + if (strlen($haystack) < strlen($needle)) return FALSE; + + // According to the spec: + // "The case-sensitivity of attribute names in selectors depends on the document language." + // (6.3.2) + // To which I say, "huh?". We assume case sensitivity. + switch ($operation) { + case EventHandler::isExactly: + return $needle == $haystack; + case EventHandler::containsWithSpace: + // XXX: This needs testing! + return preg_match('/\b/', $haystack) == 1; + //return in_array($needle, explode(' ', $haystack)); + case EventHandler::containsWithHyphen: + return in_array($needle, explode('-', $haystack)); + case EventHandler::containsInString: + return strpos($haystack, $needle) !== FALSE; + case EventHandler::beginsWith: + return strpos($haystack, $needle) === 0; + case EventHandler::endsWith: + //return strrpos($haystack, $needle) === strlen($needle) - 1; + return preg_match('/' . $needle . '$/', $haystack) == 1; + } + return FALSE; // Shouldn't be able to get here. + } + + /** + * Remove leading and trailing quotes. + */ + public static function removeQuotes($str) { + $f = substr($str, 0, 1); + $l = substr($str, -1); + if ($f === $l && ($f == '"' || $f == "'")) { + $str = substr($str, 1, -1); + } + return $str; + } + + /** + * Parse an an+b rule for CSS pseudo-classes. + * + * Invalid rules return `array(0, 0)`. This is per the spec. + * + * @param $rule + * Some rule in the an+b format. + * @retval array + * `array($aVal, $bVal)` of the two values. + */ + public static function parseAnB($rule) { + if ($rule == 'even') { + return array(2, 0); + } + elseif ($rule == 'odd') { + return array(2, 1); + } + elseif ($rule == 'n') { + return array(1, 0); + } + elseif (is_numeric($rule)) { + return array(0, (int)$rule); + } + + $regex = '/^\s*([+\-]?[0-9]*)n\s*([+\-]?)\s*([0-9]*)\s*$/'; + $matches = array(); + $res = preg_match($regex, $rule, $matches); + + // If it doesn't parse, return 0, 0. + if (!$res) { + return array(0, 0); + } + + $aVal = isset($matches[1]) ? $matches[1] : 1; + if ($aVal == '-') { + $aVal = -1; + } + else { + $aVal = (int) $aVal; + } + + $bVal = 0; + if (isset($matches[3])) { + $bVal = (int) $matches[3]; + if (isset($matches[2]) && $matches[2] == '-') { + $bVal *= -1; + } + } + return array($aVal, $bVal); + } + +} -- cgit v1.2.3