diff options
Diffstat (limited to 'lib/querypath/src/QueryPath')
26 files changed, 9150 insertions, 0 deletions
diff --git a/lib/querypath/src/QueryPath/CSS/DOMTraverser.php b/lib/querypath/src/QueryPath/CSS/DOMTraverser.php new file mode 100644 index 0000000..be8c2af --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/DOMTraverser.php @@ -0,0 +1,775 @@ +<?php +/** @file + * Traverse a DOM. + */ + +namespace QueryPath\CSS; + +use \QueryPath\CSS\DOMTraverser\Util; +use \QueryPath\CSS\DOMTraverser\PseudoClass; +use \QueryPath\CSS\DOMTraverser\PseudoElement; + +/** + * Traverse a DOM, finding matches to the selector. + * + * This traverses a DOMDocument and attempts to find + * matches to the provided selector. + * + * \b How this works + * + * This performs a bottom-up search. On the first pass, + * it attempts to find all of the matching elements for the + * last simple selector in a selector. + * + * Subsequent passes attempt to eliminate matches from the + * initial matching set. + * + * Example: + * + * Say we begin with the selector `foo.bar baz`. This is processed + * as follows: + * + * - First, find all baz elements. + * - Next, for any baz element that does not have foo as an ancestor, + * eliminate it from the matches. + * - Finally, for those that have foo as an ancestor, does that foo + * also have a class baz? If not, it is removed from the matches. + * + * \b Extrapolation + * + * Partial simple selectors are almost always expanded to include an + * element. + * + * Examples: + * + * - `:first` is expanded to `*:first` + * - `.bar` is expanded to `*.bar`. + * - `.outer .inner` is expanded to `*.outer *.inner` + * + * The exception is that IDs are sometimes not expanded, e.g.: + * + * - `#myElement` does not get expanded + * - `#myElement .class` \i may be expanded to `*#myElement *.class` + * (which will obviously not perform well). + */ +class DOMTraverser implements Traverser { + + protected $matches = array(); + protected $selector; + protected $dom; + protected $initialized = TRUE; + protected $psHandler; + protected $scopeNode; + + /** + * Build a new DOMTraverser. + * + * This requires a DOM-like object or collection of DOM nodes. + */ + public function __construct(\SPLObjectStorage $splos, $initialized = FALSE, $scopeNode = NULL) { + + $this->psHandler = new \QueryPath\CSS\DOMTraverser\PseudoClass(); + $this->initialized = $initialized; + + // Re-use the initial splos + $this->matches = $splos; + + if (count($splos) != 0) { + $splos->rewind(); + $first = $splos->current(); + if ($first instanceof \DOMDocument) { + $this->dom = $first;//->documentElement; + } + else { + $this->dom = $first->ownerDocument;//->documentElement; + } + if (empty($scopeNode)) { + $this->scopeNode = $this->dom->documentElement; + } + else { + $this->scopeNode = $scopeNode; + } + } + + // This assumes a DOM. Need to also accomodate the case + // where we get a set of elements. + /* + $this->dom = $dom; + $this->matches = new \SplObjectStorage(); + $this->matches->attach($this->dom); + */ + } + + public function debug($msg) { + fwrite(STDOUT, PHP_EOL . $msg); + } + + /** + * Given a selector, find the matches in the given DOM. + * + * This is the main function for querying the DOM using a CSS + * selector. + * + * @param string $selector + * The selector. + * @return \SPLObjectStorage + * An SPLObjectStorage containing a list of matched + * DOMNode objects. + */ + public function find($selector) { + // Setup + $handler = new Selector(); + $parser = new Parser($selector, $handler); + $parser->parse(); + $this->selector = $handler; + + //$selector = $handler->toArray(); + $found = $this->newMatches(); + foreach ($handler as $selectorGroup) { + // fprintf(STDOUT, "Selector group.\n"); + // Initialize matches if necessary. + if ($this->initialized) { + $candidates = $this->matches; + } + else { + //if (empty($selectorGroup)) { + // fprintf(STDOUT, "%s", print_r($handler->toArray(), TRUE)); + //} + $candidates = $this->initialMatch($selectorGroup[0], $this->matches); + //$this->initialized = TRUE; + } + + foreach ($candidates as $candidate) { + // fprintf(STDOUT, "Testing %s against %s.\n", $candidate->tagName, $selectorGroup[0]); + if ($this->matchesSelector($candidate, $selectorGroup)) { + // $this->debug('Attaching ' . $candidate->nodeName); + $found->attach($candidate); + } + } + } + $this->setMatches($found); + + + return $this; + } + + public function matches() { + return $this->matches; + } + + /** + * Check whether the given node matches the given selector. + * + * A selector is a group of one or more simple selectors combined + * by combinators. This determines if a given selector + * matches the given node. + * + * @attention + * Evaluation of selectors is done recursively. Thus the length + * of the selector is limited to the recursion depth allowed by + * the PHP configuration. This should only cause problems for + * absolutely huge selectors or for versions of PHP tuned to + * strictly limit recursion depth. + * + * @param object DOMNode + * The DOMNode to check. + * @param array Selector->toArray() + * The Selector to check. + * @return boolean + * A boolean TRUE if the node matches, false otherwise. + */ + public function matchesSelector($node, $selector) { + return $this->matchesSimpleSelector($node, $selector, 0); + } + + /** + * Performs a match check on a SimpleSelector. + * + * Where matchesSelector() does a check on an entire selector, + * this checks only a simple selector (plus an optional + * combinator). + * + * @param object DOMNode + * The DOMNode to check. + * @param object SimpleSelector + * The Selector to check. + * @return boolean + * A boolean TRUE if the node matches, false otherwise. + */ + public function matchesSimpleSelector($node, $selectors, $index) { + $selector = $selectors[$index]; + // Note that this will short circuit as soon as one of these + // returns FALSE. + $result = $this->matchElement($node, $selector->element, $selector->ns) + && $this->matchAttributes($node, $selector->attributes) + && $this->matchId($node, $selector->id) + && $this->matchClasses($node, $selector->classes) + && $this->matchPseudoClasses($node, $selector->pseudoClasses) + && $this->matchPseudoElements($node, $selector->pseudoElements); + + $isNextRule = isset($selectors[++$index]); + // If there is another selector, we process that if there a match + // hasn't been found. + /* + if ($isNextRule && $selectors[$index]->combinator == SimpleSelector::anotherSelector) { + // We may need to re-initialize the match set for the next selector. + if (!$this->initialized) { + $this->initialMatch($selectors[$index]); + } + if (!$result) fprintf(STDOUT, "Element: %s, Next selector: %s\n", $node->tagName, $selectors[$index]); + return $result || $this->matchesSimpleSelector($node, $selectors, $index); + } + // If we have a match and we have a combinator, we need to + // recurse up the tree. + else*/if ($isNextRule && $result) { + $result = $this->combine($node, $selectors, $index); + } + + return $result; + } + + /** + * Combine the next selector with the given match + * using the next combinator. + * + * If the next selector is combined with another + * selector, that will be evaluated too, and so on. + * So if this function returns TRUE, it means that all + * child selectors are also matches. + * + * @param DOMNode $node + * The DOMNode to test. + * @param array $selectors + * The array of simple selectors. + * @param int $index + * The index of the current selector. + * @return boolean + * TRUE if the next selector(s) match. + */ + public function combine($node, $selectors, $index) { + $selector = $selectors[$index]; + //$this->debug(implode(' ', $selectors)); + switch ($selector->combinator) { + case SimpleSelector::adjacent: + return $this->combineAdjacent($node, $selectors, $index); + case SimpleSelector::sibling: + return $this->combineSibling($node, $selectors, $index); + case SimpleSelector::directDescendant: + return $this->combineDirectDescendant($node, $selectors, $index); + case SimpleSelector::anyDescendant: + return $this->combineAnyDescendant($node, $selectors, $index); + case SimpleSelector::anotherSelector: + // fprintf(STDOUT, "Next selector: %s\n", $selectors[$index]); + return $this->matchesSimpleSelector($node, $selectors, $index); + ; + } + return FALSE; + } + + /** + * Process an Adjacent Sibling. + * + * The spec does not indicate whether Adjacent should ignore non-Element + * nodes, so we choose to ignore them. + * + * @param DOMNode $node + * A DOM Node. + * @param array $selectors + * The selectors array. + * @param int $index + * The current index to the operative simple selector in the selectors + * array. + * @return boolean + * TRUE if the combination matches, FALSE otherwise. + */ + public function combineAdjacent($node, $selectors, $index) { + while (!empty($node->previousSibling)) { + $node = $node->previousSibling; + if ($node->nodeType == XML_ELEMENT_NODE) { + //$this->debug(sprintf('Testing %s against "%s"', $node->tagName, $selectors[$index])); + return $this->matchesSimpleSelector($node, $selectors, $index); + } + } + return FALSE; + } + + /** + * Check all siblings. + * + * According to the spec, this only tests elements LEFT of the provided + * node. + * + * @param DOMNode $node + * A DOM Node. + * @param array $selectors + * The selectors array. + * @param int $index + * The current index to the operative simple selector in the selectors + * array. + * @return boolean + * TRUE if the combination matches, FALSE otherwise. + */ + public function combineSibling($node, $selectors, $index) { + while (!empty($node->previousSibling)) { + $node = $node->previousSibling; + if ($node->nodeType == XML_ELEMENT_NODE && $this->matchesSimpleSelector($node, $selectors, $index)) { + return TRUE; + } + } + return FALSE; + } + + /** + * Handle a Direct Descendant combination. + * + * Check whether the given node is a rightly-related descendant + * of its parent node. + * + * @param DOMNode $node + * A DOM Node. + * @param array $selectors + * The selectors array. + * @param int $index + * The current index to the operative simple selector in the selectors + * array. + * @return boolean + * TRUE if the combination matches, FALSE otherwise. + */ + public function combineDirectDescendant($node, $selectors, $index) { + $parent = $node->parentNode; + if (empty($parent)) { + return FALSE; + } + return $this->matchesSimpleSelector($parent, $selectors, $index); + } + + /** + * Handle Any Descendant combinations. + * + * This checks to see if there are any matching routes from the + * selector beginning at the present node. + * + * @param DOMNode $node + * A DOM Node. + * @param array $selectors + * The selectors array. + * @param int $index + * The current index to the operative simple selector in the selectors + * array. + * @return boolean + * TRUE if the combination matches, FALSE otherwise. + */ + public function combineAnyDescendant($node, $selectors, $index) { + while (!empty($node->parentNode)) { + $node = $node->parentNode; + + // Catch case where element is child of something + // else. This should really only happen with a + // document element. + if ($node->nodeType != XML_ELEMENT_NODE) { + continue; + } + + if ($this->matchesSimpleSelector($node, $selectors, $index)) { + return TRUE; + } + } + } + + /** + * Get the intial match set. + * + * This should only be executed when not working with + * an existing match set. + */ + protected function initialMatch($selector, $matches) { + $element = $selector->element; + + // If no element is specified, we have to start with the + // entire document. + if ($element == NULL) { + $element = '*'; + } + + // fprintf(STDOUT, "Initial match using %s.\n", $selector); + + // We try to do some optimization here to reduce the + // number of matches to the bare minimum. This will + // reduce the subsequent number of operations that + // must be performed in the query. + + // Experimental: ID queries use XPath to match, since + // this should give us only a single matched element + // to work with. + if (/*$element == '*' &&*/ !empty($selector->id)) { + // fprintf(STDOUT, "ID Fastrack on %s\n", $selector); + $initialMatches = $this->initialMatchOnID($selector, $matches); + } + // If a namespace is set, find the namespace matches. + elseif (!empty($selector->ns)) { + $initialMatches = $this->initialMatchOnElementNS($selector, $matches); + } + // If the element is a wildcard, using class can + // substantially reduce the number of elements that + // we start with. + elseif ($element == '*' && !empty($selector->classes)) { + // fprintf(STDOUT, "Class Fastrack on %s\n", $selector); + $initialMatches = $this->initialMatchOnClasses($selector, $matches); + } + else { + $initialMatches = $this->initialMatchOnElement($selector, $matches); + } + + //fprintf(STDOUT, "Found %d nodes.\n", count($this->matches)); + return $initialMatches; + } + + /** + * Shortcut for finding initial match by ID. + * + * If the element is set to '*' and an ID is + * set, then this should be used to find by ID, + * which will drastically reduce the amount of + * comparison operations done in PHP. + * + */ + protected function initialMatchOnID($selector, $matches) { + $id = $selector->id; + $found = $this->newMatches(); + + // Issue #145: DOMXPath will through an exception if the DOM is + // not set. + if (!($this->dom instanceof \DOMDocument)) { + return $found; + } + $baseQuery = ".//*[@id='{$id}']"; + $xpath = new \DOMXPath($this->dom); + + // Now we try to find any matching IDs. + foreach ($matches as $node) { + if ($node->getAttribute('id') == $id) { + $found->attach($node); + } + $nl = $this->initialXpathQuery($xpath, $node, $baseQuery); + $this->attachNodeList($nl, $found); + } + // Unset the ID selector. + $selector->id = NULL; + return $found; + } + + /** + * Shortcut for setting the intial match. + * + * This shortcut should only be used when the initial + * element is '*' and there are classes set. + * + * In any other case, the element finding algo is + * faster and should be used instead. + */ + protected function initialMatchOnClasses($selector, $matches) { + $found = $this->newMatches(); + + // Issue #145: DOMXPath will through an exception if the DOM is + // not set. + if (!($this->dom instanceof \DOMDocument)) { + return $found; + } + $baseQuery = ".//*[@class]"; + $xpath = new \DOMXPath($this->dom); + + // Now we try to find any matching IDs. + foreach ($matches as $node) { + // Refactor me! + if ($node->hasAttribute('class')) { + $intersect = array_intersect($selector->classes, explode(' ', $node->getAttribute('class'))); + if (count($intersect) == count($selector->classes)) { + $found->attach($node); + } + } + + $nl = $this->initialXpathQuery($xpath, $node, $baseQuery); + foreach ($nl as $node) { + $classes = $node->getAttribute('class'); + $classArray = explode(' ', $classes); + + $intersect = array_intersect($selector->classes, $classArray); + if (count($intersect) == count($selector->classes)) { + $found->attach($node); + } + } + } + + // Unset the classes selector. + $selector->classes = array(); + + return $found; + } + + /** + * Internal xpath query. + * + * This is optimized for very specific use, and is not a general + * purpose function. + */ + private function initialXpathQuery($xpath, $node, $query) { + // This works around a bug in which the document element + // does not correctly search with the $baseQuery. + if ($node->isSameNode($this->dom->documentElement)) { + $query = substr($query, 1); + } + + return $xpath->query($query, $node); + } + + /** + * Shortcut for setting the initial match. + */ + protected function initialMatchOnElement($selector, $matches) { + $element = $selector->element; + if (is_null($element)) { + $element = '*'; + } + $found = $this->newMatches(); + foreach ($matches as $node) { + // Capture the case where the initial element is the root element. + if ($node->tagName == $element + || $element == '*' && $node->parentNode instanceof \DOMDocument) { + $found->attach($node); + } + $nl = $node->getElementsByTagName($element); + $this->attachNodeList($nl, $found); + } + + $selector->element = NULL; + return $found; + } + + /** + * Get elements and filter by namespace. + */ + protected function initialMatchOnElementNS($selector, $matches) { + $ns = $selector->ns; + + $elements = $this->initialMatchOnElement($selector, $matches); + + // "any namespace" matches anything. + if ($ns == '*') { + return $elements; + } + + // Loop through and make a list of items that need to be filtered + // out, then filter them. This is required b/c ObjectStorage iterates + // wrongly when an item is detached in an access loop. + $detach = array(); + foreach ($elements as $node) { + // This lookup must be done PER NODE. + $nsuri = $node->lookupNamespaceURI($ns); + if (empty($nsuri) || $node->namespaceURI != $nsuri) { + $detach[] = $node; + } + } + foreach ($detach as $rem) { + $elements->detach($rem); + } + $selector->ns = NULL; + return $elements; + } + + /** + * Checks to see if the DOMNode matches the given element selector. + * + * This handles the following cases: + * + * - element (foo) + * - namespaced element (ns|foo) + * - namespaced wildcard (ns|*) + * - wildcard (* or *|*) + */ + protected function matchElement($node, $element, $ns = NULL) { + if (empty($element)) { + return TRUE; + } + + // Handle namespace. + if (!empty($ns) && $ns != '*') { + // Check whether we have a matching NS URI. + $nsuri = $node->lookupNamespaceURI($ns); + if(empty($nsuri) || $node->namespaceURI !== $nsuri) { + return FALSE; + } + } + + // Compare local name to given element name. + return $element == '*' || $node->localName == $element; + } + + /** + * Checks to see if the given DOMNode matches an "any element" (*). + * + * This does not handle namespaced whildcards. + */ + /* + protected function matchAnyElement($node) { + $ancestors = $this->ancestors($node); + + return count($ancestors) > 0; + } + */ + + /** + * Get a list of ancestors to the present node. + */ + protected function ancestors($node) { + $buffer = array(); + $parent = $node; + while (($parent = $parent->parentNode) !== NULL) { + $buffer[] = $parent; + } + return $buffer; + } + + /** + * Check to see if DOMNode has all of the given attributes. + * + * This can handle namespaced attributes, including namespace + * wildcards. + */ + protected function matchAttributes($node, $attributes) { + if (empty($attributes)) { + return TRUE; + } + + foreach($attributes as $attr) { + $val = isset($attr['value']) ? $attr['value'] : NULL; + + // Namespaced attributes. + if (isset($attr['ns']) && $attr['ns'] != '*') { + $nsuri = $node->lookupNamespaceURI($attr['ns']); + if (empty($nsuri) || !$node->hasAttributeNS($nsuri, $attr['name'])) { + return FALSE; + } + $matches = Util::matchesAttributeNS($node, $attr['name'], $nsuri, $val, $attr['op']); + } + elseif (isset($attr['ns']) && $attr['ns'] == '*' && $node->hasAttributes()) { + // Cycle through all of the attributes in the node. Note that + // these are DOMAttr objects. + $matches = FALSE; + $name = $attr['name']; + foreach ($node->attributes as $attrNode) { + if ($attrNode->localName == $name) { + $nsuri = $attrNode->namespaceURI; + $matches = Util::matchesAttributeNS($node, $name, $nsuri, $val, $attr['op']); + } + } + } + // No namespace. + else { + $matches = Util::matchesAttribute($node, $attr['name'], $val, $attr['op']); + } + + if (!$matches) { + return FALSE; + } + } + return TRUE; + } + /** + * Check that the given DOMNode has the given ID. + */ + protected function matchId($node, $id) { + if (empty($id)) { + return TRUE; + } + return $node->hasAttribute('id') && $node->getAttribute('id') == $id; + } + /** + * Check that the given DOMNode has all of the given classes. + */ + protected function matchClasses($node, $classes) { + if (empty($classes)) { + return TRUE; + } + + if (!$node->hasAttribute('class')) { + return FALSE; + } + + $eleClasses = preg_split('/\s+/', $node->getAttribute('class')); + if (empty($eleClasses)) { + return FALSE; + } + + // The intersection should match the given $classes. + $missing = array_diff($classes, array_intersect($classes, $eleClasses)); + + return count($missing) == 0; + } + protected function matchPseudoClasses($node, $pseudoClasses) { + $ret = TRUE; + foreach ($pseudoClasses as $pseudoClass) { + $name = $pseudoClass['name']; + // Avoid E_STRICT violation. + $value = isset($pseudoClass['value']) ? $pseudoClass['value'] : NULL; + $ret &= $this->psHandler->elementMatches($name, $node, $this->scopeNode, $value); + } + return $ret; + } + /** + * Test whether the given node matches the pseudoElements. + * + * If any pseudo-elements are passed, this will test to see + * <i>if conditions obtain that would allow the pseudo-element + * to be created</i>. This does not modify the match in any way. + */ + protected function matchPseudoElements($node, $pseudoElements) { + if (empty($pseudoElements)) { + return TRUE; + } + + foreach ($pseudoElements as $pse) { + switch ($pse) { + case 'first-line': + case 'first-letter': + case 'before': + case 'after': + return strlen($node->textContent) > 0; + case 'selection': + throw new \QueryPath\CSS\NotImplementedException("::$name is not implemented."); + } + } + } + + protected function newMatches() { + return new \SplObjectStorage(); + } + + /** + * Get the internal match set. + * Internal utility function. + */ + protected function getMatches() { + return $this->matches(); + } + + /** + * Set the internal match set. + * + * Internal utility function. + */ + protected function setMatches($matches) { + $this->matches = $matches; + } + + /** + * Attach all nodes in a node list to the given \SplObjectStorage. + */ + public function attachNodeList(\DOMNodeList $nodeList, \SplObjectStorage $splos) { + foreach ($nodeList as $item) $splos->attach($item); + } + + public function getDocument() { + return $this->dom; + } + +} diff --git a/lib/querypath/src/QueryPath/CSS/DOMTraverser/PseudoClass.php b/lib/querypath/src/QueryPath/CSS/DOMTraverser/PseudoClass.php new file mode 100644 index 0000000..0bcaf79 --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/DOMTraverser/PseudoClass.php @@ -0,0 +1,421 @@ +<?php +/** + * @file + * + * PseudoClass class. + * + * This is the first pass in an experiment to break PseudoClass handling + * out of the normal traversal. Eventually, this should become a + * top-level pluggable registry that will allow custom pseudoclasses. + * For now, though, we just handle the core pseudoclasses. + */ +namespace QueryPath\CSS\DOMTraverser; + +use \QueryPath\CSS\NotImplementedException; +use \QueryPath\CSS\EventHandler; +/** + * The PseudoClass handler. + * + */ +class PseudoClass { + + /** + * Tests whether the given element matches the given pseudoclass. + * + * @param string $pseudoclass + * The string name of the pseudoclass + * @param resource $node + * The DOMNode to be tested. + * @param resource $scope + * The DOMElement that is the active root for this node. + * @param mixed $value + * The optional value string provided with this class. This is + * used, for example, in an+b psuedoclasses. + * @retval boolean + * TRUE if the node matches, FALSE otherwise. + */ + public function elementMatches($pseudoclass, $node, $scope, $value = NULL) { + $name = strtolower($pseudoclass); + // Need to handle known pseudoclasses. + switch($name) { + case 'current': + case 'past': + case 'future': + case 'visited': + case 'hover': + case 'active': + case 'focus': + case 'animated': // Last 3 are from jQuery + case 'visible': + case 'hidden': + // These require a UA, which we don't have. + case 'valid': + case 'invalid': + case 'required': + case 'optional': + case 'read-only': + case 'read-write': + // Since we don't know how to validate elements, + // we can't supply these. + case 'dir': + // FIXME: I don't know how to get directionality info. + case 'nth-column': + case 'nth-last-column': + // We don't know what a column is in most documents. + // FIXME: Can we do this for HTML? + case 'target': + // This requires a location URL, which we don't have. + return FALSE; + case 'indeterminate': + // Because sometimes screwing with people is fun. + return (boolean) mt_rand(0, 1); + case 'lang': + // No value = exception. + if (!isset($value)) { + throw new NotImplementedException(":lang() requires a value."); + } + return $this->lang($node, $value); + case 'any-link': + return Util::matchesAttribute($node, 'href') + || Util::matchesAttribute($node, 'src') + || Util::matchesAttribute($node, 'link'); + case 'link': + return Util::matchesAttribute($node, 'href'); + case 'local-link': + return $this->isLocalLink($node); + case 'root': + return $node->isSameNode($node->ownerDocument->documentElement); + + // CSS 4 declares the :scope pseudo-class, which describes what was + // the :x-root QueryPath extension. + case 'x-root': + case 'x-reset': + case 'scope': + return $node->isSameNode($scope); + // NON-STANDARD extensions for simple support of even and odd. These + // are supported by jQuery, FF, and other user agents. + case 'even': + return $this->isNthChild($node, 'even'); + case 'odd': + return $this->isNthChild($node, 'odd'); + case 'nth-child': + return $this->isNthChild($node, $value); + case 'nth-last-child': + return $this->isNthChild($node, $value, TRUE); + case 'nth-of-type': + return $this->isNthChild($node, $value, FALSE, TRUE); + case 'nth-last-of-type': + return $this->isNthChild($node, $value, TRUE, TRUE); + case 'first-of-type': + return $this->isFirstOfType($node); + case 'last-of-type': + return $this->isLastOfType($node); + case 'only-of-type': + return $this->isFirstOfType($node) && $this->isLastOfType($node); + + // Additional pseudo-classes defined in jQuery: + case 'lt': + // I'm treating this as "less than or equal to". + $rule = sprintf('-n + %d', (int) $value); + // $rule = '-n+15'; + return $this->isNthChild($node, $rule); + case 'gt': + // I'm treating this as "greater than" + // return $this->nodePositionFromEnd($node) > (int) $value; + return $this->nodePositionFromStart($node) > (int) $value; + case 'nth': + case 'eq': + $rule = (int)$value; + return $this->isNthChild($node, $rule); + case 'first': + return $this->isNthChild($node, 1); + case 'first-child': + return $this->isFirst($node); + case 'last': + case 'last-child': + return $this->isLast($node); + case 'only-child': + return $this->isFirst($node) && $this->isLast($node); + case 'empty': + return $this->isEmpty($node); + case 'parent': + return !$this->isEmpty($node); + + case 'enabled': + case 'disabled': + case 'checked': + return Util::matchesAttribute($node, $name); + case 'text': + case 'radio': + case 'checkbox': + case 'file': + case 'password': + case 'submit': + case 'image': + case 'reset': + case 'button': + return Util::matchesAttribute($node, 'type', $name); + + case 'header': + return $this->header($node); + case 'has': + case 'matches': + return $this->has($node, $value); + break; + case 'not': + if (empty($value)) { + throw new ParseException(":not() requires a value."); + } + return $this->isNot($node, $value); + // Contains == text matches. + // In QP 2.1, this was changed. + case 'contains': + return $this->contains($node, $value); + // Since QP 2.1 + case 'contains-exactly': + return $this->containsExactly($node, $value); + default: + throw new \QueryPath\CSS\ParseException("Unknown Pseudo-Class: " . $name); + } + $this->findAnyElement = FALSE; + } + + /** + * Pseudo-class handler for :lang + * + * Note that this does not implement the spec in its entirety because we do + * not presume to "know the language" of the document. If anyone is interested + * in making this more intelligent, please do so. + */ + protected function lang($node, $value) { + // TODO: This checks for cases where an explicit language is + // set. The spec seems to indicate that an element should inherit + // language from the parent... but this is unclear. + $operator = (strpos($value, '-') !== FALSE) ? EventHandler::isExactly : EventHandler::containsWithHyphen; + + $match = TRUE; + foreach ($node->attributes as $attrNode) { + if ($attrNode->localName == 'lang') { + + if ($attrNode->nodeName == $attrNode->localName) { + // fprintf(STDOUT, "%s in NS %s\n", $attrNode->name, $attrNode->nodeName); + return Util::matchesAttribute($node, 'lang', $value, $operator); + } + else { + $nsuri = $attrNode->namespaceURI; + // fprintf(STDOUT, "%s in NS %s\n", $attrNode->name, $nsuri); + return Util::matchesAttributeNS($node, 'lang', $nsuri, $value, $operator); + } + + } + } + return FALSE; + } + + /** + * Provides jQuery pseudoclass ':header'. + */ + protected function header($node) { + return preg_match('/^h[1-9]$/i', $node->tagName) == 1; + } + + /** + * Provides pseudoclass :empty. + */ + protected function isEmpty($node) { + foreach ($node->childNodes as $kid) { + // We don't want to count PIs and comments. From the spec, it + // appears that CDATA is also not counted. + if ($kid->nodeType == XML_ELEMENT_NODE || $kid->nodeType == XML_TEXT_NODE) { + // As soon as we hit a FALSE, return. + return FALSE; + } + } + return TRUE; + } + + /** + * Provides jQuery pseudoclass :first. + * + * @todo + * This can be replaced by isNthChild(). + */ + protected function isFirst($node) { + while (isset($node->previousSibling)) { + $node = $node->previousSibling; + if ($node->nodeType == XML_ELEMENT_NODE) { + return FALSE; + } + } + return TRUE; + } + /** + * Fast version of first-of-type. + */ + protected function isFirstOfType($node) { + $type = $node->tagName; + while (isset($node->previousSibling)) { + $node = $node->previousSibling; + if ($node->nodeType == XML_ELEMENT_NODE && $node->tagName == $type) { + return FALSE; + } + } + return TRUE; + } + /** + * Fast version of jQuery :last. + */ + protected function isLast($node) { + while (isset($node->nextSibling)) { + $node = $node->nextSibling; + if ($node->nodeType == XML_ELEMENT_NODE) { + return FALSE; + } + } + return TRUE; + } + /** + * Provides last-of-type. + */ + protected function isLastOfType($node) { + $type = $node->tagName; + while (isset($node->nextSibling)) { + $node = $node->nextSibling; + if ($node->nodeType == XML_ELEMENT_NODE && $node->tagName == $type) { + return FALSE; + } + } + return TRUE; + } + /** + * Provides :contains() as the original spec called for. + * + * This is an INEXACT match. + */ + protected function contains($node, $value) { + $text = $node->textContent; + $value = Util::removeQuotes($value); + return isset($text) && (stripos($text, $value) !== FALSE); + } + /** + * Provides :contains-exactly QueryPath pseudoclass. + * + * This is an EXACT match. + */ + protected function containsExactly($node, $value) { + $text = $node->textContent; + $value = Util::removeQuotes($value); + return isset($text) && $text == $value; + } + + /** + * Provides :has pseudoclass. + */ + protected function has($node, $selector) { + $splos = new \SPLObjectStorage(); + $splos->attach($node); + $traverser = new \QueryPath\CSS\DOMTraverser($splos, TRUE); + $results = $traverser->find($selector)->matches(); + return count($results) > 0; + } + + /** + * Provides :not pseudoclass. + */ + protected function isNot($node, $selector) { + return !$this->has($node, $selector); + } + + /** + * Get the relative position of a node in its sibling set. + */ + protected function nodePositionFromStart($node, $byType = FALSE) { + $i = 1; + $tag = $node->tagName; + while (isset($node->previousSibling)) { + $node = $node->previousSibling; + if ($node->nodeType == XML_ELEMENT_NODE && (!$byType || $node->tagName == $tag)) { + ++$i; + } + } + return $i; + } + /** + * Get the relative position of a node in its sibling set. + */ + protected function nodePositionFromEnd($node, $byType = FALSE) { + $i = 1; + $tag = $node->tagName; + while (isset($node->nextSibling)) { + $node = $node->nextSibling; + if ($node->nodeType == XML_ELEMENT_NODE && (!$byType || $node->tagName == $tag)) { + ++$i; + } + } + return $i; + } + + /** + * Provides functionality for all "An+B" rules. + * Provides nth-child and also the functionality required for: + * + *- nth-last-child + *- even + *- odd + *- first + *- last + *- eq + *- nth + *- nth-of-type + *- first-of-type + *- last-of-type + *- nth-last-of-type + * + * See also QueryPath::CSS::DOMTraverser::Util::parseAnB(). + */ + protected function isNthChild($node, $value, $reverse = FALSE, $byType = FALSE) { + list($groupSize, $elementInGroup) = Util::parseAnB($value); + $parent = $node->parentNode; + if (empty($parent) + || ($groupSize == 0 && $elementInGroup == 0) + || ($groupSize > 0 && $elementInGroup > $groupSize) + ) { + return FALSE; + } + + // First we need to find the position of $node in other elements. + if ($reverse) { + $pos = $this->nodePositionFromEnd($node, $byType); + } + else { + $pos = $this->nodePositionFromStart($node, $byType); + } + + // If group size is 0, we just check to see if this + // is the nth element: + if ($groupSize == 0) { + return $pos == $elementInGroup; + } + + // Next, we normalize $elementInGroup + if ($elementInGroup < 0) { + $elementInGroup = $groupSize + $elementInGroup; + } + + + $prod = ($pos - $elementInGroup) / $groupSize; + // fprintf(STDOUT, "%d n + %d on %d is %3.5f\n", $groupSize, $elementInGroup, $pos, $prod); + + return is_int($prod) && $prod >= 0; + } + + protected function isLocalLink($node) { + if (!$node->hasAttribute('href')) { + return FALSE; + } + $url = $node->getAttribute('href'); + $scheme = parse_url($url, PHP_URL_SCHEME); + return empty($scheme) || $scheme == 'file'; + } + +} diff --git a/lib/querypath/src/QueryPath/CSS/DOMTraverser/Util.php b/lib/querypath/src/QueryPath/CSS/DOMTraverser/Util.php new file mode 100644 index 0000000..ec01d8f --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/DOMTraverser/Util.php @@ -0,0 +1,139 @@ +<?php +/** + * @file + * + * Utilities for DOM traversal. + */ +namespace QueryPath\CSS\DOMTraverser; + +use \QueryPath\CSS\EventHandler; + +/** + * Utilities for DOM Traversal. + */ +class Util { + + /** + * Check whether the given DOMElement has the given attribute. + */ + public static function matchesAttribute($node, $name, $value = NULL, $operation = EventHandler::isExactly) { + if (!$node->hasAttribute($name)) { + return FALSE; + } + + if (is_null($value)) { + return TRUE; + } + + return self::matchesAttributeValue($value, $node->getAttribute($name), $operation); + } + /** + * Check whether the given DOMElement has the given namespaced attribute. + */ + public static function matchesAttributeNS($node, $name, $nsuri, $value = NULL, $operation = EventHandler::isExactly) { + if (!$node->hasAttributeNS($nsuri, $name)) { + return FALSE; + } + + if (is_null($value)) { + return TRUE; + } + + return self::matchesAttributeValue($value, $node->getAttributeNS($nsuri, $name), $operation); + } + + /** + * Check for attr value matches based on an operation. + */ + public static function matchesAttributeValue($needle, $haystack, $operation) { + + if (strlen($haystack) < strlen($needle)) return FALSE; + + // According to the spec: + // "The case-sensitivity of attribute names in selectors depends on the document language." + // (6.3.2) + // To which I say, "huh?". We assume case sensitivity. + switch ($operation) { + case EventHandler::isExactly: + return $needle == $haystack; + case EventHandler::containsWithSpace: + // XXX: This needs testing! + return preg_match('/\b/', $haystack) == 1; + //return in_array($needle, explode(' ', $haystack)); + case EventHandler::containsWithHyphen: + return in_array($needle, explode('-', $haystack)); + case EventHandler::containsInString: + return strpos($haystack, $needle) !== FALSE; + case EventHandler::beginsWith: + return strpos($haystack, $needle) === 0; + case EventHandler::endsWith: + //return strrpos($haystack, $needle) === strlen($needle) - 1; + return preg_match('/' . $needle . '$/', $haystack) == 1; + } + return FALSE; // Shouldn't be able to get here. + } + + /** + * Remove leading and trailing quotes. + */ + public static function removeQuotes($str) { + $f = substr($str, 0, 1); + $l = substr($str, -1); + if ($f === $l && ($f == '"' || $f == "'")) { + $str = substr($str, 1, -1); + } + return $str; + } + + /** + * Parse an an+b rule for CSS pseudo-classes. + * + * Invalid rules return `array(0, 0)`. This is per the spec. + * + * @param $rule + * Some rule in the an+b format. + * @retval array + * `array($aVal, $bVal)` of the two values. + */ + public static function parseAnB($rule) { + if ($rule == 'even') { + return array(2, 0); + } + elseif ($rule == 'odd') { + return array(2, 1); + } + elseif ($rule == 'n') { + return array(1, 0); + } + elseif (is_numeric($rule)) { + return array(0, (int)$rule); + } + + $regex = '/^\s*([+\-]?[0-9]*)n\s*([+\-]?)\s*([0-9]*)\s*$/'; + $matches = array(); + $res = preg_match($regex, $rule, $matches); + + // If it doesn't parse, return 0, 0. + if (!$res) { + return array(0, 0); + } + + $aVal = isset($matches[1]) ? $matches[1] : 1; + if ($aVal == '-') { + $aVal = -1; + } + else { + $aVal = (int) $aVal; + } + + $bVal = 0; + if (isset($matches[3])) { + $bVal = (int) $matches[3]; + if (isset($matches[2]) && $matches[2] == '-') { + $bVal *= -1; + } + } + return array($aVal, $bVal); + } + +} diff --git a/lib/querypath/src/QueryPath/CSS/EventHandler.php b/lib/querypath/src/QueryPath/CSS/EventHandler.php new file mode 100644 index 0000000..a003a0a --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/EventHandler.php @@ -0,0 +1,171 @@ +<?php +/** @file + * CSS selector parsing classes. + * + * This file contains the tools necessary for parsing CSS 3 selectors. + * In the future it may be expanded to handle all of CSS 3. + * + * The parser contained herein is has an event-based API. Implementors should + * begin by implementing the {@link EventHandler} interface. For an example + * of how this is done, see {@link EventHandler.php}. + * + * @author M Butcher <matt@aleph-null.tv> + * @license MIT + */ +namespace QueryPath\CSS; + +/** @addtogroup querypath_css CSS Parsing + * QueryPath includes a CSS 3 Selector parser. + * + * + * Typically the parser is not accessed directly. Most developers will use it indirectly from + * qp(), htmlqp(), or one of the methods on a QueryPath object. + * + * This parser is modular and is not tied to QueryPath, so you can use it in your + * own (non-QueryPath) projects if you wish. To dive in, start with EventHandler, the + * event interface that works like a SAX API for CSS selectors. If you want to check out + * the details, check out the parser (QueryPath::CSS::Parser), scanner + * (QueryPath::CSS::Scanner), and token list (QueryPath::CSS::Token). + */ + +/** + * An event handler for handling CSS 3 Selector parsing. + * + * This provides a standard interface for CSS 3 Selector event handling. As the + * parser parses a selector, it will fire events. Implementations of EventHandler + * can then handle the events. + * + * This library is inspired by the SAX2 API for parsing XML. Each component of a + * selector fires an event, passing the necessary data on to the event handler. + * + * @ingroup querypath_css + */ +interface EventHandler { + /** The is-exactly (=) operator. */ + const isExactly = 0; // = + /** The contains-with-space operator (~=). */ + const containsWithSpace = 1; // ~= + /** The contains-with-hyphen operator (!=). */ + const containsWithHyphen = 2; // |= + /** The contains-in-string operator (*=). */ + const containsInString = 3; // *= + /** The begins-with operator (^=). */ + const beginsWith = 4; // ^= + /** The ends-with operator ($=). */ + const endsWith = 5; // $= + /** The any-element operator (*). */ + const anyElement = '*'; + + /** + * This event is fired when a CSS ID is encountered. + * An ID begins with an octothorp: #name. + * + * @param string $id + * The ID passed in. + */ + public function elementID($id); // #name + /** + * Handle an element name. + * Example: name + * @param string $name + * The name of the element. + */ + public function element($name); // name + /** + * Handle a namespaced element name. + * example: namespace|name + * @param string $name + * The tag name. + * @param string $namespace + * The namespace identifier (Not the URI) + */ + public function elementNS($name, $namespace = NULL); + /** + * Handle an any-element (*) operator. + * Example: * + */ + public function anyElement(); // * + /** + * Handle an any-element operator that is constrained to a namespace. + * Example: ns|* + * @param string $ns + * The namespace identifier (not the URI). + */ + public function anyElementInNS($ns); // ns|* + /** + * Handle a CSS class selector. + * Example: .name + * @param string $name + * The name of the class. + */ + public function elementClass($name); // .name + /** + * Handle an attribute selector. + * Example: [name=attr] + * Example: [name~=attr] + * @param string $name + * The attribute name. + * @param string $value + * The value of the attribute, if given. + * @param int $operation + * The operation to be used for matching. See {@link EventHandler} + * constants for a list of supported operations. + */ + public function attribute($name, $value = NULL, $operation = EventHandler::isExactly); // [name=attr] + /** + * Handle an attribute selector bound to a specific namespace. + * Example: [ns|name=attr] + * Example: [ns|name~=attr] + * @param string $name + * The attribute name. + * @param string $ns + * The namespace identifier (not the URI). + * @param string $value + * The value of the attribute, if given. + * @param int $operation + * The operation to be used for matching. See {@link EventHandler} + * constants for a list of supported operations. + */ + public function attributeNS($name, $ns, $value = NULL, $operation = EventHandler::isExactly); + /** + * Handle a pseudo-class. + * Example: :name(value) + * @param string $name + * The pseudo-class name. + * @param string $value + * The value, if one is found. + */ + public function pseudoClass($name, $value = NULL); //:name(value) + /** + * Handle a pseudo-element. + * Example: ::name + * @param string $name + * The pseudo-element name. + */ + public function pseudoElement($name); // ::name + /** + * Handle a direct descendant combinator. + * Example: > + */ + public function directDescendant(); // > + /** + * Handle a adjacent combinator. + * Example: + + */ + public function adjacent(); // + + /** + * Handle an another-selector combinator. + * Example: , + */ + public function anotherSelector(); // , + /** + * Handle a sibling combinator. + * Example: ~ + */ + public function sibling(); // ~ combinator + /** + * Handle an any-descendant combinator. + * Example: ' ' + */ + public function anyDescendant(); // ' ' (space) operator. +} diff --git a/lib/querypath/src/QueryPath/CSS/InputStream.php b/lib/querypath/src/QueryPath/CSS/InputStream.php new file mode 100644 index 0000000..2967845 --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/InputStream.php @@ -0,0 +1,57 @@ +<?php +/** + * @file + * + * The CSS Input Stream abstraction. + */ + +namespace QueryPath\CSS; + +/** + * Simple wrapper to turn a string into an input stream. + * This provides a standard interface on top of an array of + * characters. + */ +class InputStream { + protected $stream = NULL; + public $position = 0; + /** + * Build a new CSS input stream from a string. + * + * @param string + * String to turn into an input stream. + */ + function __construct($string) { + $this->stream = str_split($string); + } + /** + * Look ahead one character. + * + * @return char + * Returns the next character, but does not remove it from + * the stream. + */ + function peek() { + return $this->stream[0]; + } + /** + * Get the next unconsumed character in the stream. + * This will remove that character from the front of the + * stream and return it. + */ + function consume() { + $ret = array_shift($this->stream); + if (!empty($ret)) { + $this->position++; + } + return $ret; + } + /** + * Check if the stream is empty. + * @return boolean + * Returns TRUE when the stream is empty, FALSE otherwise. + */ + function isEmpty() { + return count($this->stream) == 0; + } +} diff --git a/lib/querypath/src/QueryPath/CSS/NotImplementedException.php b/lib/querypath/src/QueryPath/CSS/NotImplementedException.php new file mode 100644 index 0000000..6705f30 --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/NotImplementedException.php @@ -0,0 +1,15 @@ +<?php +/** + * @file + * An exception for CSS errors. + */ +namespace QueryPath\CSS; +/** + * Exception thrown for unimplemented CSS. + * + * This is thrown in cases where some feature is expected, but the current + * implementation does not support that feature. + * + * @ingroup querypath_css + */ +class NotImplementedException extends \Exception {} diff --git a/lib/querypath/src/QueryPath/CSS/ParseException.php b/lib/querypath/src/QueryPath/CSS/ParseException.php new file mode 100644 index 0000000..957857a --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/ParseException.php @@ -0,0 +1,15 @@ +<?php +/** + * @file + * + * The CSS parsing exception class. + */ + +namespace QueryPath\CSS; + +/** + * Exception indicating an error in CSS parsing. + * + * @ingroup querypath_css + */ +class ParseException extends \QueryPath\Exception {} diff --git a/lib/querypath/src/QueryPath/CSS/Parser.php b/lib/querypath/src/QueryPath/CSS/Parser.php new file mode 100644 index 0000000..f041612 --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/Parser.php @@ -0,0 +1,575 @@ +<?php +/** + * @file + * + * The CSS parser + */ + +namespace QueryPath\CSS; + +/** + * Parse a CSS selector. + * + * In CSS, a selector is used to identify which element or elements + * in a DOM are being selected for the application of a particular style. + * Effectively, selectors function as a query language for a structured + * document -- almost always HTML or XML. + * + * This class provides an event-based parser for CSS selectors. It can be + * used, for example, as a basis for writing a DOM query engine based on + * CSS. + * + * @ingroup querypath_css + */ +class Parser { + protected $scanner = NULL; + protected $buffer = ''; + protected $handler = NULL; + protected $strict = FALSE; + + protected $DEBUG = FALSE; + + /** + * Construct a new CSS parser object. This will attempt to + * parse the string as a CSS selector. As it parses, it will + * send events to the EventHandler implementation. + */ + public function __construct($string, EventHandler $handler) { + $this->originalString = $string; + $is = new InputStream($string); + $this->scanner = new Scanner($is); + $this->handler = $handler; + } + + /** + * Parse the selector. + * + * This begins an event-based parsing process that will + * fire events as the selector is handled. A EventHandler + * implementation will be responsible for handling the events. + * @throws ParseException + */ + public function parse() { + + $this->scanner->nextToken(); + while ($this->scanner->token !== FALSE) { + // Primitive recursion detection. + $position = $this->scanner->position(); + + if ($this->DEBUG) { + print "PARSE " . $this->scanner->token. "\n"; + } + $this->selector(); + + $finalPosition = $this->scanner->position(); + + if ($this->scanner->token !== FALSE && $finalPosition == $position) { + // If we get here, then the scanner did not pop a single character + // off of the input stream during a full run of the parser, which + // means that the current input does not match any recognizable + // pattern. + throw new ParseException('CSS selector is not well formed.'); + } + + } + + } + + /** + * A restricted parser that can only parse simple selectors. + * The pseudoClass handler for this parser will throw an + * exception if it encounters a pseudo-element or the + * negation pseudo-class. + * + * @deprecated This is not used anywhere in QueryPath and + * may be removed. + *//* + public function parseSimpleSelector() { + while ($this->scanner->token !== FALSE) { + if ($this->DEBUG) print "SIMPLE SELECTOR\n"; + $this->allElements(); + $this->elementName(); + $this->elementClass(); + $this->elementID(); + $this->pseudoClass(TRUE); // Operate in restricted mode. + $this->attribute(); + + // TODO: Need to add failure conditions here. + } + }*/ + + /** + * Handle an entire CSS selector. + */ + private function selector() { + if ($this->DEBUG) print "SELECTOR{$this->scanner->position()}\n"; + $this->consumeWhitespace(); // Remove leading whitespace + $this->simpleSelectors(); + $this->combinator(); + } + + /** + * Consume whitespace and return a count of the number of whitespace consumed. + */ + private function consumeWhitespace() { + if ($this->DEBUG) print "CONSUME WHITESPACE\n"; + $white = 0; + while ($this->scanner->token == Token::white) { + $this->scanner->nextToken(); + ++$white; + } + return $white; + } + + /** + * Handle one of the five combinators: '>', '+', ' ', '~', and ','. + * This will call the appropriate event handlers. + * @see EventHandler::directDescendant(), + * @see EventHandler::adjacent(), + * @see EventHandler::anyDescendant(), + * @see EventHandler::anotherSelector(). + */ + private function combinator() { + if ($this->DEBUG) print "COMBINATOR\n"; + /* + * Problem: ' ' and ' > ' are both valid combinators. + * So we have to track whitespace consumption to see + * if we are hitting the ' ' combinator or if the + * selector just has whitespace padding another combinator. + */ + + // Flag to indicate that post-checks need doing + $inCombinator = FALSE; + $white = $this->consumeWhitespace(); + $t = $this->scanner->token; + + if ($t == Token::rangle) { + $this->handler->directDescendant(); + $this->scanner->nextToken(); + $inCombinator = TRUE; + //$this->simpleSelectors(); + } + elseif ($t == Token::plus) { + $this->handler->adjacent(); + $this->scanner->nextToken(); + $inCombinator = TRUE; + //$this->simpleSelectors(); + } + elseif ($t == Token::comma) { + $this->handler->anotherSelector(); + $this->scanner->nextToken(); + $inCombinator = TRUE; + //$this->scanner->selectors(); + } + elseif ($t == Token::tilde) { + $this->handler->sibling(); + $this->scanner->nextToken(); + $inCombinator = TRUE; + } + + // Check that we don't get two combinators in a row. + if ($inCombinator) { + $white = 0; + if ($this->DEBUG) print "COMBINATOR: " . Token::name($t) . "\n"; + $this->consumeWhitespace(); + if ($this->isCombinator($this->scanner->token)) { + throw new ParseException("Illegal combinator: Cannot have two combinators in sequence."); + } + } + // Check to see if we have whitespace combinator: + elseif ($white > 0) { + if ($this->DEBUG) print "COMBINATOR: any descendant\n"; + $inCombinator = TRUE; + $this->handler->anyDescendant(); + } + else { + if ($this->DEBUG) print "COMBINATOR: no combinator found.\n"; + } + } + + /** + * Check if the token is a combinator. + */ + private function isCombinator($tok) { + $combinators = array(Token::plus, Token::rangle, Token::comma, Token::tilde); + return in_array($tok, $combinators); + } + + /** + * Handle a simple selector. + */ + private function simpleSelectors() { + if ($this->DEBUG) print "SIMPLE SELECTOR\n"; + $this->allElements(); + $this->elementName(); + $this->elementClass(); + $this->elementID(); + $this->pseudoClass(); + $this->attribute(); + } + + /** + * Handles CSS ID selectors. + * This will call EventHandler::elementID(). + */ + private function elementID() { + if ($this->DEBUG) print "ELEMENT ID\n"; + if ($this->scanner->token == Token::octo) { + $this->scanner->nextToken(); + if ($this->scanner->token !== Token::char) { + throw new ParseException("Expected string after #"); + } + $id = $this->scanner->getNameString(); + $this->handler->elementID($id); + } + } + + /** + * Handles CSS class selectors. + * This will call the EventHandler::elementClass() method. + */ + private function elementClass() { + if ($this->DEBUG) print "ELEMENT CLASS\n"; + if ($this->scanner->token == Token::dot) { + $this->scanner->nextToken(); + $this->consumeWhitespace(); // We're very fault tolerent. This should prob through error. + $cssClass = $this->scanner->getNameString(); + $this->handler->elementClass($cssClass); + } + } + + /** + * Handle a pseudo-class and pseudo-element. + * + * CSS 3 selectors support separate pseudo-elements, using :: instead + * of : for separator. This is now supported, and calls the pseudoElement + * handler, EventHandler::pseudoElement(). + * + * This will call EventHandler::pseudoClass() when a + * pseudo-class is parsed. + */ + private function pseudoClass($restricted = FALSE) { + if ($this->DEBUG) print "PSEUDO-CLASS\n"; + if ($this->scanner->token == Token::colon) { + + // Check for CSS 3 pseudo element: + $isPseudoElement = FALSE; + if ($this->scanner->nextToken() === Token::colon) { + $isPseudoElement = TRUE; + $this->scanner->nextToken(); + } + + $name = $this->scanner->getNameString(); + if ($restricted && $name == 'not') { + throw new ParseException("The 'not' pseudo-class is illegal in this context."); + } + + $value = NULL; + if ($this->scanner->token == Token::lparen) { + if ($isPseudoElement) { + throw new ParseException("Illegal left paren. Pseudo-Element cannot have arguments."); + } + $value = $this->pseudoClassValue(); + } + + // FIXME: This should throw errors when pseudo element has values. + if ($isPseudoElement) { + if ($restricted) { + throw new ParseException("Pseudo-Elements are illegal in this context."); + } + $this->handler->pseudoElement($name); + $this->consumeWhitespace(); + + // Per the spec, pseudo-elements must be the last items in a selector, so we + // check to make sure that we are either at the end of the stream or that a + // new selector is starting. Only one pseudo-element is allowed per selector. + if ($this->scanner->token !== FALSE && $this->scanner->token !== Token::comma) { + throw new ParseException("A Pseudo-Element must be the last item in a selector."); + } + } + else { + $this->handler->pseudoClass($name, $value); + } + } + } + + /** + * Get the value of a pseudo-classes. + * + * @return string + * Returns the value found from a pseudo-class. + * + * @todo Pseudoclasses can be passed pseudo-elements and + * other pseudo-classes as values, which means :pseudo(::pseudo) + * is legal. + */ + private function pseudoClassValue() { + if ($this->scanner->token == Token::lparen) { + $buf = ''; + + // For now, just leave pseudoClass value vague. + /* + // We have to peek to see if next char is a colon because + // pseudo-classes and pseudo-elements are legal strings here. + print $this->scanner->peek(); + if ($this->scanner->peek() == ':') { + print "Is pseudo\n"; + $this->scanner->nextToken(); + + // Pseudo class + if ($this->scanner->token == Token::colon) { + $buf .= ':'; + $this->scanner->nextToken(); + // Pseudo element + if ($this->scanner->token == Token::colon) { + $buf .= ':'; + $this->scanner->nextToken(); + } + // Ident + $buf .= $this->scanner->getNameString(); + } + } + else { + print "fetching string.\n"; + $buf .= $this->scanner->getQuotedString(); + if ($this->scanner->token != Token::rparen) { + $this->throwError(Token::rparen, $this->scanner->token); + } + $this->scanner->nextToken(); + } + return $buf; + */ + //$buf .= $this->scanner->getQuotedString(); + $buf .= $this->scanner->getPseudoClassString(); + return $buf; + } + } + + /** + * Handle element names. + * This will call the EventHandler::elementName(). + * + * This handles: + * <code> + * name (EventHandler::element()) + * |name (EventHandler::element()) + * ns|name (EventHandler::elementNS()) + * ns|* (EventHandler::elementNS()) + * </code> + */ + private function elementName() { + if ($this->DEBUG) print "ELEMENT NAME\n"; + if ($this->scanner->token === Token::pipe) { + // We have '|name', which is equiv to 'name' + $this->scanner->nextToken(); + $this->consumeWhitespace(); + $elementName = $this->scanner->getNameString(); + $this->handler->element($elementName); + } + elseif ($this->scanner->token === Token::char) { + $elementName = $this->scanner->getNameString(); + if ($this->scanner->token == Token::pipe) { + // Get ns|name + $elementNS = $elementName; + $this->scanner->nextToken(); + $this->consumeWhitespace(); + if ($this->scanner->token === Token::star) { + // We have ns|* + $this->handler->anyElementInNS($elementNS); + $this->scanner->nextToken(); + } + elseif ($this->scanner->token !== Token::char) { + $this->throwError(Token::char, $this->scanner->token); + } + else { + $elementName = $this->scanner->getNameString(); + // We have ns|name + $this->handler->elementNS($elementName, $elementNS); + } + + } + else { + $this->handler->element($elementName); + } + } + } + + /** + * Check for all elements designators. Due to the new CSS 3 namespace + * support, this is slightly more complicated, now, as it handles + * the *|name and *|* cases as well as *. + * + * Calls EventHandler::anyElement() or EventHandler::elementName(). + */ + private function allElements() { + if ($this->scanner->token === Token::star) { + $this->scanner->nextToken(); + if ($this->scanner->token === Token::pipe) { + $this->scanner->nextToken(); + if ($this->scanner->token === Token::star) { + // We got *|*. According to spec, this requires + // that the element has a namespace, so we pass it on + // to the handler: + $this->scanner->nextToken(); + $this->handler->anyElementInNS('*'); + } + else { + // We got *|name, which means the name MUST be in a namespce, + // so we pass this off to elementNameNS(). + $name = $this->scanner->getNameString(); + $this->handler->elementNS($name, '*'); + } + } + else { + $this->handler->anyElement(); + } + } + } + + /** + * Handler an attribute. + * An attribute can be in one of two forms: + * <code>[attrName]</code> + * or + * <code>[attrName="AttrValue"]</code> + * + * This may call the following event handlers: EventHandler::attribute(). + */ + private function attribute() { + if($this->scanner->token == Token::lsquare) { + $attrVal = $op = $ns = NULL; + + $this->scanner->nextToken(); + $this->consumeWhitespace(); + + if ($this->scanner->token === Token::at) { + if ($this->strict) { + throw new ParseException('The @ is illegal in attributes.'); + } + else { + $this->scanner->nextToken(); + $this->consumeWhitespace(); + } + } + + if ($this->scanner->token === Token::star) { + // Global namespace... requires that attr be prefixed, + // so we pass this on to a namespace handler. + $ns = '*'; + $this->scanner->nextToken(); + } + if ($this->scanner->token === Token::pipe) { + // Skip this. It's a global namespace. + $this->scanner->nextToken(); + $this->consumeWhitespace(); + } + + $attrName = $this->scanner->getNameString(); + $this->consumeWhitespace(); + + // Check for namespace attribute: ns|attr. We have to peek() to make + // sure that we haven't hit the |= operator, which looks the same. + if ($this->scanner->token === Token::pipe && $this->scanner->peek() !== '=') { + // We have a namespaced attribute. + $ns = $attrName; + $this->scanner->nextToken(); + $attrName = $this->scanner->getNameString(); + $this->consumeWhitespace(); + } + + // Note: We require that operators do not have spaces + // between characters, e.g. ~= , not ~ =. + + // Get the operator: + switch ($this->scanner->token) { + case Token::eq: + $this->consumeWhitespace(); + $op = EventHandler::isExactly; + break; + case Token::tilde: + if ($this->scanner->nextToken() !== Token::eq) { + $this->throwError(Token::eq, $this->scanner->token); + } + $op = EventHandler::containsWithSpace; + break; + case Token::pipe: + if ($this->scanner->nextToken() !== Token::eq) { + $this->throwError(Token::eq, $this->scanner->token); + } + $op = EventHandler::containsWithHyphen; + break; + case Token::star: + if ($this->scanner->nextToken() !== Token::eq) { + $this->throwError(Token::eq, $this->scanner->token); + } + $op = EventHandler::containsInString; + break; + case Token::dollar; + if ($this->scanner->nextToken() !== Token::eq) { + $this->throwError(Token::eq, $this->scanner->token); + } + $op = EventHandler::endsWith; + break; + case Token::carat: + if ($this->scanner->nextToken() !== Token::eq) { + $this->throwError(Token::eq, $this->scanner->token); + } + $op = EventHandler::beginsWith; + break; + } + + if (isset($op)) { + // Consume '=' and go on. + $this->scanner->nextToken(); + $this->consumeWhitespace(); + + // So... here we have a problem. The grammer suggests that the + // value here is String1 or String2, both of which are enclosed + // in quotes of some sort, and both of which allow lots of special + // characters. But the spec itself includes examples like this: + // [lang=fr] + // So some bareword support is assumed. To get around this, we assume + // that bare words follow the NAME rules, while quoted strings follow + // the String1/String2 rules. + + if ($this->scanner->token === Token::quote || $this->scanner->token === Token::squote) { + $attrVal = $this->scanner->getQuotedString(); + } + else { + $attrVal = $this->scanner->getNameString(); + } + + if ($this->DEBUG) { + print "ATTR: $attrVal AND OP: $op\n"; + } + } + + $this->consumeWhitespace(); + + if ($this->scanner->token != Token::rsquare) { + $this->throwError(Token::rsquare, $this->scanner->token); + } + + if (isset($ns)) { + $this->handler->attributeNS($attrName, $ns, $attrVal, $op); + } + elseif (isset($attrVal)) { + $this->handler->attribute($attrName, $attrVal, $op); + } + else { + $this->handler->attribute($attrName); + } + $this->scanner->nextToken(); + } + } + + /** + * Utility for throwing a consistantly-formatted parse error. + */ + private function throwError($expected, $got) { + $filter = sprintf('Expected %s, got %s', Token::name($expected), Token::name($got)); + throw new ParseException($filter); + } + +} + diff --git a/lib/querypath/src/QueryPath/CSS/QueryPathEventHandler.php b/lib/querypath/src/QueryPath/CSS/QueryPathEventHandler.php new file mode 100644 index 0000000..2dcfd57 --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/QueryPathEventHandler.php @@ -0,0 +1,1424 @@ +<?php +/** @file + * This file contains a full implementation of the EventHandler interface. + * + * The tools in this package initiate a CSS selector parsing routine and then + * handle all of the callbacks. + * + * The implementation provided herein adheres to the CSS 3 Selector specification + * with the following caveats: + * + * - The negation (:not()) and containment (:has()) pseudo-classes allow *full* + * selectors and not just simple selectors. + * - There are a variety of additional pseudo-classes supported by this + * implementation that are not part of the spec. Most of the jQuery + * pseudo-classes are supported. The :x-root pseudo-class is also supported. + * - Pseudo-classes that require a User Agent to function have been disabled. + * Thus there is no :hover pseudo-class. + * - All pseudo-elements require the double-colon (::) notation. This breaks + * backward compatibility with the 2.1 spec, but it makes visible the issue + * that pseudo-elements cannot be effectively used with most of the present + * library. They return <b>stdClass objects with a text property</b> (QP > 1.3) + * instead of elements. + * - The pseudo-classes first-of-type, nth-of-type and last-of-type may or may + * not conform to the specification. The spec is unclear. + * - pseudo-class filters of the form -an+b do not function as described in the + * specification. However, they do behave the same way here as they do in + * jQuery. + * - This library DOES provide XML namespace aware tools. Selectors can use + * namespaces to increase specificity. + * - This library does nothing with the CSS 3 Selector specificity rating. Of + * course specificity is preserved (to the best of our abilities), but there + * is no calculation done. + * + * For detailed examples of how the code works and what selectors are supported, + * see the CssEventTests file, which contains the unit tests used for + * testing this implementation. + * + * @author M Butcher <matt@aleph-null.tv> + * @license MIT + */ + +namespace QueryPath\CSS; + +/** + * Handler that tracks progress of a query through a DOM. + * + * The main idea is that we keep a copy of the tree, and then use an + * array to keep track of matches. To handle a list of selectors (using + * the comma separator), we have to track both the currently progressing + * match and the previously matched elements. + * + * To use this handler: + * @code + * $filter = '#id'; // Some CSS selector + * $handler = new QueryPathEventHandler(DOMNode $dom); + * $parser = new Parser(); + * $parser->parse($filter, $handler); + * $matches = $handler->getMatches(); + * @endcode + * + * $matches will be an array of zero or more DOMElement objects. + * + * @ingroup querypath_css + */ +class QueryPathEventHandler implements EventHandler, Traverser { + protected $dom = NULL; // Always points to the top level. + protected $matches = NULL; // The matches + protected $alreadyMatched = NULL; // Matches found before current selector. + protected $findAnyElement = TRUE; + + + /** + * Create a new event handler. + */ + public function __construct($dom) { + $this->alreadyMatched = new \SplObjectStorage(); + $matches = new \SplObjectStorage(); + + // Array of DOMElements + if (is_array($dom) || $dom instanceof \SplObjectStorage) { + //$matches = array(); + foreach($dom as $item) { + if ($item instanceof \DOMNode && $item->nodeType == XML_ELEMENT_NODE) { + //$matches[] = $item; + $matches->attach($item); + } + } + //$this->dom = count($matches) > 0 ? $matches[0] : NULL; + if ($matches->count() > 0) { + $matches->rewind(); + $this->dom = $matches->current(); + } + else { + //throw new Exception("Setting DOM to Null"); + $this->dom = NULL; + } + $this->matches = $matches; + } + // DOM Document -- we get the root element. + elseif ($dom instanceof \DOMDocument) { + $this->dom = $dom->documentElement; + $matches->attach($dom->documentElement); + } + // DOM Element -- we use this directly + elseif ($dom instanceof \DOMElement) { + $this->dom = $dom; + $matches->attach($dom); + } + // NodeList -- We turn this into an array + elseif ($dom instanceof \DOMNodeList) { + $a = array(); // Not sure why we are doing this.... + foreach ($dom as $item) { + if ($item->nodeType == XML_ELEMENT_NODE) { + $matches->attach($item); + $a[] = $item; + } + } + $this->dom = $a; + } + // FIXME: Handle SimpleXML! + // Uh-oh... we don't support anything else. + else { + throw new \QueryPath\Exception("Unhandled type: " . get_class($dom)); + } + $this->matches = $matches; + } + + /** + * Generic finding method. + * + * This is the primary searching method used throughout QueryPath. + * + * @param string $filter + * A valid CSS 3 filter. + * @return QueryPathEventHandler + * Returns itself. + */ + public function find($filter) { + $parser = new Parser($filter, $this); + $parser->parse(); + return $this; + } + + /** + * Get the elements that match the evaluated selector. + * + * This should be called after the filter has been parsed. + * + * @return array + * The matched items. This is almost always an array of + * {@link DOMElement} objects. It is always an instance of + * {@link DOMNode} objects. + */ + public function getMatches() { + //$result = array_merge($this->alreadyMatched, $this->matches); + $result = new \SplObjectStorage(); + foreach($this->alreadyMatched as $m) $result->attach($m); + foreach($this->matches as $m) $result->attach($m); + return $result; + } + + public function matches() { + return $this->getMatches(); + } + + /** + * Find any element with the ID that matches $id. + * + * If this finds an ID, it will immediately quit. Essentially, it doesn't + * enforce ID uniqueness, but it assumes it. + * + * @param $id + * String ID for an element. + */ + public function elementID($id) { + $found = new \SplObjectStorage(); + $matches = $this->candidateList(); + foreach ($matches as $item) { + // Check if any of the current items has the desired ID. + if ($item->hasAttribute('id') && $item->getAttribute('id') === $id) { + $found->attach($item); + break; + } + } + $this->matches = $found; + $this->findAnyElement = FALSE; + } + + // Inherited + public function element($name) { + $matches = $this->candidateList(); + $this->findAnyElement = FALSE; + $found = new \SplObjectStorage(); + foreach ($matches as $item) { + // Should the existing item be included? + // In some cases (e.g. element is root element) + // it definitely should. But what about other cases? + if ($item->tagName == $name) { + $found->attach($item); + } + // Search for matching kids. + //$nl = $item->getElementsByTagName($name); + //$found = array_merge($found, $this->nodeListToArray($nl)); + } + + $this->matches = $found; + } + + // Inherited + public function elementNS($lname, $namespace = NULL) { + $this->findAnyElement = FALSE; + $found = new \SplObjectStorage(); + $matches = $this->candidateList(); + foreach ($matches as $item) { + // Looking up NS URI only works if the XMLNS attributes are declared + // at a level equal to or above the searching doc. Normalizing a doc + // should fix this, but it doesn't. So we have to use a fallback + // detection scheme which basically searches by lname and then + // does a post hoc check on the tagname. + + //$nsuri = $item->lookupNamespaceURI($namespace); + $nsuri = $this->dom->lookupNamespaceURI($namespace); + + // XXX: Presumably the base item needs to be checked. Spec isn't + // too clear, but there are three possibilities: + // - base should always be checked (what we do here) + // - base should never be checked (only children) + // - base should only be checked if it is the root node + if ($item instanceof \DOMNode + && $item->namespaceURI == $nsuri + && $lname == $item->localName) { + $found->attach($item); + } + + if (!empty($nsuri)) { + $nl = $item->getElementsByTagNameNS($nsuri, $lname); + // If something is found, merge them: + //if (!empty($nl)) $found = array_merge($found, $this->nodeListToArray($nl)); + if (!empty($nl)) $this->attachNodeList($nl, $found); + } + else { + //$nl = $item->getElementsByTagName($namespace . ':' . $lname); + $nl = $item->getElementsByTagName($lname); + $tagname = $namespace . ':' . $lname; + $nsmatches = array(); + foreach ($nl as $node) { + if ($node->tagName == $tagname) { + //$nsmatches[] = $node; + $found->attach($node); + } + } + // If something is found, merge them: + //if (!empty($nsmatches)) $found = array_merge($found, $nsmatches); + } + } + $this->matches = $found; + } + + public function anyElement() { + $found = new \SplObjectStorage(); + //$this->findAnyElement = TRUE; + $matches = $this->candidateList(); + foreach ($matches as $item) { + $found->attach($item); // Add self + // See issue #20 or section 6.2 of this: + // http://www.w3.org/TR/2009/PR-css3-selectors-20091215/#universal-selector + //$nl = $item->getElementsByTagName('*'); + //$this->attachNodeList($nl, $found); + } + + $this->matches = $found; + $this->findAnyElement = FALSE; + } + public function anyElementInNS($ns) { + //$this->findAnyElement = TRUE; + $nsuri = $this->dom->lookupNamespaceURI($ns); + $found = new \SplObjectStorage(); + if (!empty($nsuri)) { + $matches = $this->candidateList(); + foreach ($matches as $item) { + if ($item instanceOf \DOMNode && $nsuri == $item->namespaceURI) { + $found->attach($item); + } + } + } + $this->matches = $found;//UniqueElementList::get($found); + $this->findAnyElement = FALSE; + } + public function elementClass($name) { + + $found = new \SplObjectStorage(); + $matches = $this->candidateList(); + foreach ($matches as $item) { + if ($item->hasAttribute('class')) { + $classes = explode(' ', $item->getAttribute('class')); + if (in_array($name, $classes)) $found->attach($item); + } + } + + $this->matches = $found;//UniqueElementList::get($found); + $this->findAnyElement = FALSE; + } + + public function attribute($name, $value = NULL, $operation = EventHandler::isExactly) { + $found = new \SplObjectStorage(); + $matches = $this->candidateList(); + foreach ($matches as $item) { + if ($item->hasAttribute($name)) { + if (isset($value)) { + // If a value exists, then we need a match. + if($this->attrValMatches($value, $item->getAttribute($name), $operation)) { + $found->attach($item); + } + } + else { + // If no value exists, then we consider it a match. + $found->attach($item); + } + } + } + $this->matches = $found; //UniqueElementList::get($found); + $this->findAnyElement = FALSE; + } + + /** + * Helper function to find all elements with exact matches. + * + * @deprecated All use cases seem to be covered by attribute(). + */ + protected function searchForAttr($name, $value = NULL) { + $found = new \SplObjectStorage(); + $matches = $this->candidateList(); + foreach ($matches as $candidate) { + if ($candidate->hasAttribute($name)) { + // If value is required, match that, too. + if (isset($value) && $value == $candidate->getAttribute($name)) { + $found->attach($candidate); + } + // Otherwise, it's a match on name alone. + else { + $found->attach($candidate); + } + } + } + + $this->matches = $found; + } + + public function attributeNS($lname, $ns, $value = NULL, $operation = EventHandler::isExactly) { + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + if (count($matches) == 0) { + $this->matches = $found; + return; + } + + // Get the namespace URI for the given label. + //$uri = $matches[0]->lookupNamespaceURI($ns); + $matches->rewind(); + $e = $matches->current(); + $uri = $e->lookupNamespaceURI($ns); + + foreach ($matches as $item) { + //foreach ($item->attributes as $attr) { + // print "$attr->prefix:$attr->localName ($attr->namespaceURI), Value: $attr->nodeValue\n"; + //} + if ($item->hasAttributeNS($uri, $lname)) { + if (isset($value)) { + if ($this->attrValMatches($value, $item->getAttributeNS($uri, $lname), $operation)) { + $found->attach($item); + } + } + else { + $found->attach($item); + } + } + } + $this->matches = $found; + $this->findAnyElement = FALSE; + } + + /** + * This also supports the following nonstandard pseudo classes: + * - :x-reset/:x-root (reset to the main item passed into the constructor. Less drastic than :root) + * - :odd/:even (shorthand for :nth-child(odd)/:nth-child(even)) + */ + public function pseudoClass($name, $value = NULL) { + $name = strtolower($name); + // Need to handle known pseudoclasses. + switch($name) { + case 'visited': + case 'hover': + case 'active': + case 'focus': + case 'animated': // Last 3 are from jQuery + case 'visible': + case 'hidden': + // These require a UA, which we don't have. + case 'target': + // This requires a location URL, which we don't have. + $this->matches = new \SplObjectStorage(); + break; + case 'indeterminate': + // The assumption is that there is a UA and the format is HTML. + // I don't know if this should is useful without a UA. + throw new NotImplementedException(":indeterminate is not implemented."); + break; + case 'lang': + // No value = exception. + if (!isset($value)) { + throw new NotImplementedException("No handler for lang pseudoclass without value."); + } + $this->lang($value); + break; + case 'link': + $this->searchForAttr('href'); + break; + case 'root': + $found = new \SplObjectStorage(); + if (empty($this->dom)) { + $this->matches = $found; + } + elseif (is_array($this->dom)) { + $found->attach($this->dom[0]->ownerDocument->documentElement); + $this->matches = $found; + } + elseif ($this->dom instanceof \DOMNode) { + $found->attach($this->dom->ownerDocument->documentElement); + $this->matches = $found; + } + elseif ($this->dom instanceof \DOMNodeList && $this->dom->length > 0) { + $found->attach($this->dom->item(0)->ownerDocument->documentElement); + $this->matches = $found; + } + else { + // Hopefully we never get here: + $found->attach($this->dom); + $this->matches = $found; + } + break; + + // NON-STANDARD extensions for reseting to the "top" items set in + // the constructor. + case 'x-root': + case 'x-reset': + $this->matches = new \SplObjectStorage(); + $this->matches->attach($this->dom); + break; + + // NON-STANDARD extensions for simple support of even and odd. These + // are supported by jQuery, FF, and other user agents. + case 'even': + $this->nthChild(2, 0); + break; + case 'odd': + $this->nthChild(2, 1); + break; + + // Standard child-checking items. + case 'nth-child': + list($aVal, $bVal) = $this->parseAnB($value); + $this->nthChild($aVal, $bVal); + break; + case 'nth-last-child': + list($aVal, $bVal) = $this->parseAnB($value); + $this->nthLastChild($aVal, $bVal); + break; + case 'nth-of-type': + list($aVal, $bVal) = $this->parseAnB($value); + $this->nthOfTypeChild($aVal, $bVal, FALSE); + break; + case 'nth-last-of-type': + list($aVal, $bVal) = $this->parseAnB($value); + $this->nthLastOfTypeChild($aVal, $bVal); + break; + case 'first-child': + $this->nthChild(0, 1); + break; + case 'last-child': + $this->nthLastChild(0, 1); + break; + case 'first-of-type': + $this->firstOfType(); + break; + case 'last-of-type': + $this->lastOfType(); + break; + case 'only-child': + $this->onlyChild(); + break; + case 'only-of-type': + $this->onlyOfType(); + break; + case 'empty': + $this->emptyElement(); + break; + case 'not': + if (empty($value)) { + throw new ParseException(":not() requires a value."); + } + $this->not($value); + break; + // Additional pseudo-classes defined in jQuery: + case 'lt': + case 'gt': + case 'nth': + case 'eq': + case 'first': + case 'last': + //case 'even': + //case 'odd': + $this->getByPosition($name, $value); + break; + case 'parent': + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + foreach ($matches as $match) { + if (!empty($match->firstChild)) { + $found->attach($match); + } + } + $this->matches = $found; + break; + + case 'enabled': + case 'disabled': + case 'checked': + $this->attribute($name); + break; + case 'text': + case 'radio': + case 'checkbox': + case 'file': + case 'password': + case 'submit': + case 'image': + case 'reset': + case 'button': + $this->attribute('type', $name); + break; + + case 'header': + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + foreach ($matches as $item) { + $tag = $item->tagName; + $f = strtolower(substr($tag, 0, 1)); + if ($f == 'h' && strlen($tag) == 2 && ctype_digit(substr($tag, 1, 1))) { + $found->attach($item); + } + } + $this->matches = $found; + break; + case 'has': + $this->has($value); + break; + // Contains == text matches. + // In QP 2.1, this was changed. + case 'contains': + $value = $this->removeQuotes($value); + + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + foreach ($matches as $item) { + if (strpos($item->textContent, $value) !== FALSE) { + $found->attach($item); + } + } + $this->matches = $found; + break; + + // Since QP 2.1 + case 'contains-exactly': + $value = $this->removeQuotes($value); + + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + foreach ($matches as $item) { + if ($item->textContent == $value) { + $found->attach($item); + } + } + $this->matches = $found; + break; + default: + throw new ParseException("Unknown Pseudo-Class: " . $name); + } + $this->findAnyElement = FALSE; + } + + /** + * Remove leading and trailing quotes. + */ + private function removeQuotes($str) { + $f = substr($str, 0, 1); + $l = substr($str, -1); + if ($f === $l && ($f == '"' || $f == "'")) { + $str = substr($str, 1, -1); + } + return $str; + } + + /** + * Pseudo-class handler for a variety of jQuery pseudo-classes. + * Handles lt, gt, eq, nth, first, last pseudo-classes. + */ + private function getByPosition($operator, $pos) { + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + if ($matches->count() == 0) { + return; + } + + switch ($operator) { + case 'nth': + case 'eq': + if ($matches->count() >= $pos) { + //$found[] = $matches[$pos -1]; + foreach ($matches as $match) { + // CSS is 1-based, so we pre-increment. + if ($matches->key() + 1 == $pos) { + $found->attach($match); + break; + } + } + } + break; + case 'first': + if ($matches->count() > 0) { + $matches->rewind(); // This is necessary to init. + $found->attach($matches->current()); + } + break; + case 'last': + if ($matches->count() > 0) { + + // Spin through iterator. + foreach ($matches as $item) {}; + + $found->attach($item); + } + break; + // case 'even': + // for ($i = 1; $i <= count($matches); ++$i) { + // if ($i % 2 == 0) { + // $found[] = $matches[$i]; + // } + // } + // break; + // case 'odd': + // for ($i = 1; $i <= count($matches); ++$i) { + // if ($i % 2 == 0) { + // $found[] = $matches[$i]; + // } + // } + // break; + case 'lt': + $i = 0; + foreach ($matches as $item) { + if (++$i < $pos) { + $found->attach($item); + } + } + break; + case 'gt': + $i = 0; + foreach ($matches as $item) { + if (++$i > $pos) { + $found->attach($item); + } + } + break; + } + + $this->matches = $found; + } + + /** + * Parse an an+b rule for CSS pseudo-classes. + * @param $rule + * Some rule in the an+b format. + * @return + * Array (list($aVal, $bVal)) of the two values. + * @throws ParseException + * If the rule does not follow conventions. + */ + protected function parseAnB($rule) { + if ($rule == 'even') { + return array(2, 0); + } + elseif ($rule == 'odd') { + return array(2, 1); + } + elseif ($rule == 'n') { + return array(1, 0); + } + elseif (is_numeric($rule)) { + return array(0, (int)$rule); + } + + $rule = explode('n', $rule); + if (count($rule) == 0) { + throw new ParseException("nth-child value is invalid."); + } + + // Each of these is legal: 1, -1, and -. '-' is shorthand for -1. + $aVal = trim($rule[0]); + $aVal = ($aVal == '-') ? -1 : (int)$aVal; + + $bVal = !empty($rule[1]) ? (int)trim($rule[1]) : 0; + return array($aVal, $bVal); + } + + /** + * Pseudo-class handler for nth-child and all related pseudo-classes. + * + * @param int $groupSize + * The size of the group (in an+b, this is a). + * @param int $elementInGroup + * The offset in a group. (in an+b this is b). + * @param boolean $lastChild + * Whether counting should begin with the last child. By default, this is false. + * Pseudo-classes that start with the last-child can set this to true. + */ + protected function nthChild($groupSize, $elementInGroup, $lastChild = FALSE) { + // EXPERIMENTAL: New in Quark. This should be substantially faster + // than the old (jQuery-ish) version. It still has E_STRICT violations + // though. + $parents = new \SplObjectStorage(); + $matches = new \SplObjectStorage(); + + $i = 0; + foreach ($this->matches as $item) { + $parent = $item->parentNode; + + // Build up an array of all of children of this parent, and store the + // index of each element for reference later. We only need to do this + // once per parent, though. + if (!$parents->contains($parent)) { + + $c = 0; + foreach ($parent->childNodes as $child) { + // We only want nodes, and if this call is preceded by an element + // selector, we only want to match elements with the same tag name. + // !!! This last part is a grey area in the CSS 3 Selector spec. It seems + // necessary to make the implementation match the examples in the spec. However, + // jQuery 1.2 does not do this. + if ($child->nodeType == XML_ELEMENT_NODE && ($this->findAnyElement || $child->tagName == $item->tagName)) { + // This may break E_STRICT. + $child->nodeIndex = ++$c; + } + } + // This may break E_STRICT. + $parent->numElements = $c; + $parents->attach($parent); + } + + // If we are looking for the last child, we count from the end of a list. + // Note that we add 1 because CSS indices begin at 1, not 0. + if ($lastChild) { + $indexToMatch = $item->parentNode->numElements - $item->nodeIndex + 1; + } + // Otherwise we count from the beginning of the list. + else { + $indexToMatch = $item->nodeIndex; + } + + // If group size is 0, then we return element at the right index. + if ($groupSize == 0) { + if ($indexToMatch == $elementInGroup) + $matches->attach($item); + } + // If group size != 0, then we grab nth element from group offset by + // element in group. + else { + if (($indexToMatch - $elementInGroup) % $groupSize == 0 + && ($indexToMatch - $elementInGroup) / $groupSize >= 0) { + $matches->attach($item); + } + } + + // Iterate. + ++$i; + } + $this->matches = $matches; + } + + /** + * Reverse a set of matches. + * + * This is now necessary because internal matches are no longer represented + * as arrays. + * @since QueryPath 2.0 + *//* + private function reverseMatches() { + // Reverse the candidate list. There must be a better way of doing + // this. + $arr = array(); + foreach ($this->matches as $m) array_unshift($arr, $m); + + $this->found = new \SplObjectStorage(); + foreach ($arr as $item) $this->found->attach($item); + }*/ + + /** + * Pseudo-class handler for :nth-last-child and related pseudo-classes. + */ + protected function nthLastChild($groupSize, $elementInGroup) { + // New in Quark. + $this->nthChild($groupSize, $elementInGroup, TRUE); + } + + /** + * Get a list of peer elements. + * If $requireSameTag is TRUE, then only peer elements with the same + * tagname as the given element will be returned. + * + * @param $element + * A DomElement. + * @param $requireSameTag + * Boolean flag indicating whether all matches should have the same + * element name (tagName) as $element. + * @return + * Array of peer elements. + *//* + protected function listPeerElements($element, $requireSameTag = FALSE) { + $peers = array(); + $parent = $element->parentNode; + foreach ($parent->childNodes as $node) { + if ($node->nodeType == XML_ELEMENT_NODE) { + if ($requireSameTag) { + // Need to make sure that the tag matches: + if ($element->tagName == $node->tagName) { + $peers[] = $node; + } + } + else { + $peers[] = $node; + } + } + } + return $peers; + } + */ + /** + * Get the nth child (by index) from matching candidates. + * + * This is used by pseudo-class handlers. + */ + /* + protected function childAtIndex($index, $tagName = NULL) { + $restrictToElement = !$this->findAnyElement; + $matches = $this->candidateList(); + $defaultTagName = $tagName; + + // XXX: Added in Quark: I believe this should return an empty + // match set if no child was found tat the index. + $this->matches = new \SplObjectStorage(); + + foreach ($matches as $item) { + $parent = $item->parentNode; + + // If a default tag name is supplied, we always use it. + if (!empty($defaultTagName)) { + $tagName = $defaultTagName; + } + // If we are inside of an element selector, we use the + // tag name of the given elements. + elseif ($restrictToElement) { + $tagName = $item->tagName; + } + // Otherwise, we skip the tag name match. + else { + $tagName = NULL; + } + + // Loop through all children looking for matches. + $i = 0; + foreach ($parent->childNodes as $child) { + if ($child->nodeType !== XML_ELEMENT_NODE) { + break; // Skip non-elements + } + + // If type is set, then we do type comparison + if (!empty($tagName)) { + // Check whether tag name matches the type. + if ($child->tagName == $tagName) { + // See if this is the index we are looking for. + if ($i == $index) { + //$this->matches = new \SplObjectStorage(); + $this->matches->attach($child); + return; + } + // If it's not the one we are looking for, increment. + ++$i; + } + } + // We don't care about type. Any tagName will match. + else { + if ($i == $index) { + $this->matches->attach($child); + return; + } + ++$i; + } + } // End foreach + } + + }*/ + + /** + * Pseudo-class handler for nth-of-type-child. + * Not implemented. + */ + protected function nthOfTypeChild($groupSize, $elementInGroup, $lastChild) { + // EXPERIMENTAL: New in Quark. This should be substantially faster + // than the old (jQuery-ish) version. It still has E_STRICT violations + // though. + $parents = new \SplObjectStorage(); + $matches = new \SplObjectStorage(); + + $i = 0; + foreach ($this->matches as $item) { + $parent = $item->parentNode; + + // Build up an array of all of children of this parent, and store the + // index of each element for reference later. We only need to do this + // once per parent, though. + if (!$parents->contains($parent)) { + + $c = 0; + foreach ($parent->childNodes as $child) { + // This doesn't totally make sense, since the CSS 3 spec does not require that + // this pseudo-class be adjoined to an element (e.g. ' :nth-of-type' is allowed). + if ($child->nodeType == XML_ELEMENT_NODE && $child->tagName == $item->tagName) { + // This may break E_STRICT. + $child->nodeIndex = ++$c; + } + } + // This may break E_STRICT. + $parent->numElements = $c; + $parents->attach($parent); + } + + // If we are looking for the last child, we count from the end of a list. + // Note that we add 1 because CSS indices begin at 1, not 0. + if ($lastChild) { + $indexToMatch = $item->parentNode->numElements - $item->nodeIndex + 1; + } + // Otherwise we count from the beginning of the list. + else { + $indexToMatch = $item->nodeIndex; + } + + // If group size is 0, then we return element at the right index. + if ($groupSize == 0) { + if ($indexToMatch == $elementInGroup) + $matches->attach($item); + } + // If group size != 0, then we grab nth element from group offset by + // element in group. + else { + if (($indexToMatch - $elementInGroup) % $groupSize == 0 + && ($indexToMatch - $elementInGroup) / $groupSize >= 0) { + $matches->attach($item); + } + } + + // Iterate. + ++$i; + } + $this->matches = $matches; + } + + /** + * Pseudo-class handler for nth-last-of-type-child. + * Not implemented. + */ + protected function nthLastOfTypeChild($groupSize, $elementInGroup) { + $this->nthOfTypeChild($groupSize, $elementInGroup, TRUE); + } + + /** + * Pseudo-class handler for :lang + */ + protected function lang($value) { + // TODO: This checks for cases where an explicit language is + // set. The spec seems to indicate that an element should inherit + // language from the parent... but this is unclear. + $operator = (strpos($value, '-') !== FALSE) ? self::isExactly : self::containsWithHyphen; + + $orig = $this->matches; + $origDepth = $this->findAnyElement; + + // Do first pass: attributes in default namespace + $this->attribute('lang', $value, $operator); + $lang = $this->matches; // Temp array for merging. + + // Reset + $this->matches = $orig; + $this->findAnyElement = $origDepth; + + // Do second pass: attributes in 'xml' namespace. + $this->attributeNS('lang', 'xml', $value, $operator); + + + // Merge results. + // FIXME: Note that we lose natural ordering in + // the document because we search for xml:lang separately + // from lang. + foreach ($this->matches as $added) $lang->attach($added); + $this->matches = $lang; + } + + /** + * Pseudo-class handler for :not(filter). + * + * This does not follow the specification in the following way: The CSS 3 + * selector spec says the value of not() must be a simple selector. This + * function allows complex selectors. + * + * @param string $filter + * A CSS selector. + */ + protected function not($filter) { + $matches = $this->candidateList(); + //$found = array(); + $found = new \SplObjectStorage(); + foreach ($matches as $item) { + $handler = new QueryPathEventHandler($item); + $not_these = $handler->find($filter)->getMatches(); + if ($not_these->count() == 0) { + $found->attach($item); + } + } + // No need to check for unique elements, since the list + // we began from already had no duplicates. + $this->matches = $found; + } + + /** + * Pseudo-class handler for :has(filter). + * This can also be used as a general filtering routine. + */ + public function has($filter) { + $matches = $this->candidateList(); + //$found = array(); + $found = new \SplObjectStorage(); + foreach ($matches as $item) { + $handler = new QueryPathEventHandler($item); + $these = $handler->find($filter)->getMatches(); + if (count($these) > 0) { + $found->attach($item); + } + } + $this->matches = $found; + return $this; + } + + /** + * Pseudo-class handler for :first-of-type. + */ + protected function firstOfType() { + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + foreach ($matches as $item) { + $type = $item->tagName; + $parent = $item->parentNode; + foreach ($parent->childNodes as $kid) { + if ($kid->nodeType == XML_ELEMENT_NODE && $kid->tagName == $type) { + if (!$found->contains($kid)) { + $found->attach($kid); + } + break; + } + } + } + $this->matches = $found; + } + + /** + * Pseudo-class handler for :last-of-type. + */ + protected function lastOfType() { + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + foreach ($matches as $item) { + $type = $item->tagName; + $parent = $item->parentNode; + for ($i = $parent->childNodes->length - 1; $i >= 0; --$i) { + $kid = $parent->childNodes->item($i); + if ($kid->nodeType == XML_ELEMENT_NODE && $kid->tagName == $type) { + if (!$found->contains($kid)) { + $found->attach($kid); + } + break; + } + } + } + $this->matches = $found; + } + + /** + * Pseudo-class handler for :only-child. + */ + protected function onlyChild() { + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + foreach($matches as $item) { + $parent = $item->parentNode; + $kids = array(); + foreach($parent->childNodes as $kid) { + if ($kid->nodeType == XML_ELEMENT_NODE) { + $kids[] = $kid; + } + } + // There should be only one child element, and + // it should be the one being tested. + if (count($kids) == 1 && $kids[0] === $item) { + $found->attach($kids[0]); + } + } + $this->matches = $found; + } + + /** + * Pseudo-class handler for :empty. + */ + protected function emptyElement() { + $found = new \SplObjectStorage(); + $matches = $this->candidateList(); + foreach ($matches as $item) { + $empty = TRUE; + foreach($item->childNodes as $kid) { + // From the spec: Elements and Text nodes are the only ones to + // affect emptiness. + if ($kid->nodeType == XML_ELEMENT_NODE || $kid->nodeType == XML_TEXT_NODE) { + $empty = FALSE; + break; + } + } + if ($empty) { + $found->attach($item); + } + } + $this->matches = $found; + } + + /** + * Pseudo-class handler for :only-of-type. + */ + protected function onlyOfType() { + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + foreach ($matches as $item) { + if (!$item->parentNode) { + $this->matches = new \SplObjectStorage(); + } + $parent = $item->parentNode; + $onlyOfType = TRUE; + + // See if any peers are of the same type + foreach($parent->childNodes as $kid) { + if ($kid->nodeType == XML_ELEMENT_NODE + && $kid->tagName == $item->tagName + && $kid !== $item) { + //$this->matches = new \SplObjectStorage(); + $onlyOfType = FALSE; + break; + } + } + + // If no others were found, attach this one. + if ($onlyOfType) $found->attach($item); + } + $this->matches = $found; + } + + /** + * Check for attr value matches based on an operation. + */ + protected function attrValMatches($needle, $haystack, $operation) { + + if (strlen($haystack) < strlen($needle)) return FALSE; + + // According to the spec: + // "The case-sensitivity of attribute names in selectors depends on the document language." + // (6.3.2) + // To which I say, "huh?". We assume case sensitivity. + switch ($operation) { + case EventHandler::isExactly: + return $needle == $haystack; + case EventHandler::containsWithSpace: + return in_array($needle, explode(' ', $haystack)); + case EventHandler::containsWithHyphen: + return in_array($needle, explode('-', $haystack)); + case EventHandler::containsInString: + return strpos($haystack, $needle) !== FALSE; + case EventHandler::beginsWith: + return strpos($haystack, $needle) === 0; + case EventHandler::endsWith: + //return strrpos($haystack, $needle) === strlen($needle) - 1; + return preg_match('/' . $needle . '$/', $haystack) == 1; + } + return FALSE; // Shouldn't be able to get here. + } + + /** + * As the spec mentions, these must be at the end of a selector or + * else they will cause errors. Most selectors return elements. Pseudo-elements + * do not. + */ + public function pseudoElement($name) { + // process the pseudoElement + switch ($name) { + // XXX: Should this return an array -- first line of + // each of the matched elements? + case 'first-line': + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + $o = new \stdClass(); + foreach ($matches as $item) { + $str = $item->textContent; + $lines = explode("\n", $str); + if (!empty($lines)) { + $line = trim($lines[0]); + if (!empty($line)) { + $o->textContent = $line; + $found->attach($o);//trim($lines[0]); + } + } + } + $this->matches = $found; + break; + // XXX: Should this return an array -- first letter of each + // of the matched elements? + case 'first-letter': + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + $o = new \stdClass(); + foreach ($matches as $item) { + $str = $item->textContent; + if (!empty($str)) { + $str = substr($str,0, 1); + $o->textContent = $str; + $found->attach($o); + } + } + $this->matches = $found; + break; + case 'before': + case 'after': + // There is nothing in a DOM to return for the before and after + // selectors. + case 'selection': + // With no user agent, we don't have a concept of user selection. + throw new NotImplementedException("The $name pseudo-element is not implemented."); + break; + } + $this->findAnyElement = FALSE; + } + public function directDescendant() { + $this->findAnyElement = FALSE; + + $kids = new \SplObjectStorage(); + foreach ($this->matches as $item) { + $kidsNL = $item->childNodes; + foreach ($kidsNL as $kidNode) { + if ($kidNode->nodeType == XML_ELEMENT_NODE) { + $kids->attach($kidNode); + } + } + } + $this->matches = $kids; + } + /** + * For an element to be adjacent to another, it must be THE NEXT NODE + * in the node list. So if an element is surrounded by pcdata, there are + * no adjacent nodes. E.g. in <a/>FOO<b/>, the a and b elements are not + * adjacent. + * + * In a strict DOM parser, line breaks and empty spaces are nodes. That means + * nodes like this will not be adjacent: <test/> <test/>. The space between + * them makes them non-adjacent. If this is not the desired behavior, pass + * in the appropriate flags to your parser. Example: + * <code> + * $doc = new DomDocument(); + * $doc->loadXML('<test/> <test/>', LIBXML_NOBLANKS); + * </code> + */ + public function adjacent() { + $this->findAnyElement = FALSE; + // List of nodes that are immediately adjacent to the current one. + //$found = array(); + $found = new \SplObjectStorage(); + foreach ($this->matches as $item) { + while (isset($item->nextSibling)) { + if (isset($item->nextSibling) && $item->nextSibling->nodeType === XML_ELEMENT_NODE) { + $found->attach($item->nextSibling); + break; + } + $item = $item->nextSibling; + } + } + $this->matches = $found; + } + + public function anotherSelector() { + $this->findAnyElement = FALSE; + // Copy old matches into buffer. + if ($this->matches->count() > 0) { + //$this->alreadyMatched = array_merge($this->alreadyMatched, $this->matches); + foreach ($this->matches as $item) $this->alreadyMatched->attach($item); + } + + // Start over at the top of the tree. + $this->findAnyElement = TRUE; // Reset depth flag. + $this->matches = new \SplObjectStorage(); + $this->matches->attach($this->dom); + } + + /** + * Get all nodes that are siblings to currently selected nodes. + * + * If two passed in items are siblings of each other, neither will + * be included in the list of siblings. Their status as being candidates + * excludes them from being considered siblings. + */ + public function sibling() { + $this->findAnyElement = FALSE; + // Get the nodes at the same level. + + if ($this->matches->count() > 0) { + $sibs = new \SplObjectStorage(); + foreach ($this->matches as $item) { + /*$candidates = $item->parentNode->childNodes; + foreach ($candidates as $candidate) { + if ($candidate->nodeType === XML_ELEMENT_NODE && $candidate !== $item) { + $sibs->attach($candidate); + } + } + */ + while ($item->nextSibling != NULL) { + $item = $item->nextSibling; + if ($item->nodeType === XML_ELEMENT_NODE) $sibs->attach($item); + } + } + $this->matches = $sibs; + } + } + + /** + * Get any descendant. + */ + public function anyDescendant() { + // Get children: + $found = new \SplObjectStorage(); + foreach ($this->matches as $item) { + $kids = $item->getElementsByTagName('*'); + //$found = array_merge($found, $this->nodeListToArray($kids)); + $this->attachNodeList($kids, $found); + } + $this->matches = $found; + + // Set depth flag: + $this->findAnyElement = TRUE; + } + + /** + * Determine what candidates are in the current scope. + * + * This is a utility method that gets the list of elements + * that should be evaluated in the context. If $this->findAnyElement + * is TRUE, this will return a list of every element that appears in + * the subtree of $this->matches. Otherwise, it will just return + * $this->matches. + */ + private function candidateList() { + if ($this->findAnyElement) { + return $this->getAllCandidates($this->matches); + } + return $this->matches; + } + + /** + * Get a list of all of the candidate elements. + * + * This is used when $this->findAnyElement is TRUE. + * @param $elements + * A list of current elements (usually $this->matches). + * + * @return + * A list of all candidate elements. + */ + private function getAllCandidates($elements) { + $found = new \SplObjectStorage(); + foreach ($elements as $item) { + $found->attach($item); // put self in + $nl = $item->getElementsByTagName('*'); + //foreach ($nl as $node) $found[] = $node; + $this->attachNodeList($nl, $found); + } + return $found; + } + /* + public function nodeListToArray($nodeList) { + $array = array(); + foreach ($nodeList as $node) { + if ($node->nodeType == XML_ELEMENT_NODE) { + $array[] = $node; + } + } + return $array; + } + */ + + /** + * Attach all nodes in a node list to the given \SplObjectStorage. + */ + public function attachNodeList(\DOMNodeList $nodeList, \SplObjectStorage $splos) { + foreach ($nodeList as $item) $splos->attach($item); + } + +} diff --git a/lib/querypath/src/QueryPath/CSS/Scanner.php b/lib/querypath/src/QueryPath/CSS/Scanner.php new file mode 100644 index 0000000..3513a0b --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/Scanner.php @@ -0,0 +1,306 @@ +<?php +/** @file + * The scanner. + */ +namespace QueryPath\CSS; +/** + * Scanner for CSS selector parsing. + * + * This provides a simple scanner for traversing an input stream. + * + * @ingroup querypath_css + */ +final class Scanner { + var $is = NULL; + public $value = NULL; + public $token = NULL; + + var $recurse = FALSE; + var $it = 0; + + /** + * Given a new input stream, tokenize the CSS selector string. + * @see InputStream + * @param InputStream $in + * An input stream to be scanned. + */ + public function __construct(InputStream $in) { + $this->is = $in; + } + + /** + * Return the position of the reader in the string. + */ + public function position() { + return $this->is->position; + } + + /** + * See the next char without removing it from the stack. + * + * @return char + * Returns the next character on the stack. + */ + public function peek() { + return $this->is->peek(); + } + + /** + * Get the next token in the input stream. + * + * This sets the current token to the value of the next token in + * the stream. + * + * @return int + * Returns an int value corresponding to one of the Token constants, + * or FALSE if the end of the string is reached. (Remember to use + * strong equality checking on FALSE, since 0 is a valid token id.) + */ + public function nextToken() { + $tok = -1; + ++$this->it; + if ($this->is->isEmpty()) { + if ($this->recurse) { + throw new \QueryPath\Exception("Recursion error detected at iteration " . $this->it . '.'); + exit(); + } + //print "{$this->it}: All done\n"; + $this->recurse = TRUE; + $this->token = FALSE; + return FALSE; + } + $ch = $this->is->consume(); + //print __FUNCTION__ . " Testing $ch.\n"; + if (ctype_space($ch)) { + $this->value = ' '; // Collapse all WS to a space. + $this->token = $tok = Token::white; + //$ch = $this->is->consume(); + return $tok; + } + + if (ctype_alnum($ch) || $ch == '-' || $ch == '_') { + // It's a character + $this->value = $ch; //strtolower($ch); + $this->token = $tok = Token::char; + return $tok; + } + + $this->value = $ch; + + switch($ch) { + case '*': + $tok = Token::star; + break; + case chr(ord('>')): + $tok = Token::rangle; + break; + case '.': + $tok = Token::dot; + break; + case '#': + $tok = Token::octo; + break; + case '[': + $tok = Token::lsquare; + break; + case ']': + $tok = Token::rsquare; + break; + case ':': + $tok = Token::colon; + break; + case '(': + $tok = Token::lparen; + break; + case ')': + $tok = Token::rparen; + break; + case '+': + $tok = Token::plus; + break; + case '~': + $tok = Token::tilde; + break; + case '=': + $tok = Token::eq; + break; + case '|': + $tok = Token::pipe; + break; + case ',': + $tok = Token::comma; + break; + case chr(34): + $tok = Token::quote; + break; + case "'": + $tok = Token::squote; + break; + case '\\': + $tok = Token::bslash; + break; + case '^': + $tok = Token::carat; + break; + case '$': + $tok = Token::dollar; + break; + case '@': + $tok = Token::at; + break; + } + + + // Catch all characters that are legal within strings. + if ($tok == -1) { + // TODO: This should be UTF-8 compatible, but PHP doesn't + // have a native UTF-8 string. Should we use external + // mbstring library? + + $ord = ord($ch); + // Characters in this pool are legal for use inside of + // certain strings. Extended ASCII is used here, though I + // Don't know if these are really legal. + if (($ord >= 32 && $ord <= 126) || ($ord >= 128 && $ord <= 255)) { + $tok = Token::stringLegal; + } + else { + throw new ParseException('Illegal character found in stream: ' . $ord); + } + } + + $this->token = $tok; + return $tok; + } + + /** + * Get a name string from the input stream. + * A name string must be composed of + * only characters defined in Token:char: -_a-zA-Z0-9 + */ + public function getNameString() { + $buf = ''; + while ($this->token === Token::char) { + $buf .= $this->value; + $this->nextToken(); + //print '_'; + } + return $buf; + } + + /** + * This gets a string with any legal 'string' characters. + * See CSS Selectors specification, section 11, for the + * definition of string. + * + * This will check for string1, string2, and the case where a + * string is unquoted (Oddly absent from the "official" grammar, + * though such strings are present as examples in the spec.) + * + * Note: + * Though the grammar supplied by CSS 3 Selectors section 11 does not + * address the contents of a pseudo-class value, the spec itself indicates + * that a pseudo-class value is a "value between parenthesis" [6.6]. The + * examples given use URLs among other things, making them closer to the + * definition of 'string' than to 'name'. So we handle them here as strings. + */ + public function getQuotedString() { + if ($this->token == Token::quote || $this->token == Token::squote || $this->token == Token::lparen) { + $end = ($this->token == Token::lparen) ? Token::rparen : $this->token; + $buf = ''; + $escape = FALSE; + + $this->nextToken(); // Skip the opening quote/paren + + // The second conjunct is probably not necessary. + while ($this->token !== FALSE && $this->token > -1) { + //print "Char: $this->value \n"; + if ($this->token == Token::bslash && !$escape) { + // XXX: The backslash (\) is removed here. + // Turn on escaping. + //$buf .= $this->value; + $escape = TRUE; + } + elseif ($escape) { + // Turn off escaping + $buf .= $this->value; + $escape = FALSE; + } + elseif ($this->token === $end) { + // At end of string; skip token and break. + $this->nextToken(); + break; + } + else { + // Append char. + $buf .= $this->value; + } + $this->nextToken(); + } + return $buf; + } + } + + // Get the contents inside of a pseudoClass(). + public function getPseudoClassString() { + if ($this->token == Token::quote || $this->token == Token::squote || $this->token == Token::lparen) { + $end = ($this->token == Token::lparen) ? Token::rparen : $this->token; + $buf = ''; + $escape = FALSE; + + $this->nextToken(); // Skip the opening quote/paren + + // The second conjunct is probably not necessary. + while ($this->token !== FALSE && $this->token > -1) { + //print "Char: $this->value \n"; + if ($this->token == Token::bslash && !$escape) { + // XXX: The backslash (\) is removed here. + // Turn on escaping. + //$buf .= $this->value; + $escape = TRUE; + } + elseif ($escape) { + // Turn off escaping + $buf .= $this->value; + $escape = FALSE; + } + // Allow nested pseudoclasses. + elseif ($this->token == Token::lparen) { + $buf .= "("; + $buf .= $this->getPseudoClassString(); + $buf .= ")"; + } + elseif ($this->token === $end) { + // At end of string; skip token and break. + $this->nextToken(); + break; + } + else { + // Append char. + $buf .= $this->value; + } + $this->nextToken(); + } + return $buf; + } + } + + /** + * Get a string from the input stream. + * This is a convenience function for getting a string of + * characters that are either alphanumber or whitespace. See + * the Token::white and Token::char definitions. + * + * @deprecated This is not used anywhere in QueryPath. + *//* + public function getStringPlusWhitespace() { + $buf = ''; + if($this->token === FALSE) {return '';} + while ($this->token === Token::char || $this->token == Token::white) { + $buf .= $this->value; + $this->nextToken(); + } + return $buf; + }*/ + +} diff --git a/lib/querypath/src/QueryPath/CSS/Selector.php b/lib/querypath/src/QueryPath/CSS/Selector.php new file mode 100644 index 0000000..4b538bd --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/Selector.php @@ -0,0 +1,144 @@ +<?php +/** @file + * A selector. + */ + +namespace QueryPath\CSS; + +/** + * A CSS Selector. + * + * A CSS selector is made up of one or more Simple Selectors + * (SimpleSelector). + * + * @attention + * The Selector data structure is a LIFO (Last in, First out). This is + * because CSS selectors are best processed "bottom up". Thus, when + * iterating over 'a>b>c', the iterator will produce: + * - c + * - b + * - a + * It is assumed, therefore, that any suitable querying engine will + * traverse from the bottom (`c`) back up. + * + * @b Usage + * + * This class is an event handler. It can be plugged into an Parser and + * receive the events the Parser generates. + * + * This class is also an iterator. Once the parser has completed, the + * captured selectors can be iterated over. + * + * @code + * <?php + * $selectorList = new \QueryPath\CSS\Selector(); + * $parser = new \QueryPath\CSS\Parser($selector, $selectorList); + * + * $parser->parse(); + * + * foreach ($selectorList as $simpleSelector) { + * // Do something with the SimpleSelector. + * print_r($simpleSelector); + * } + * ?> + * @endode + * + * + * @since QueryPath 3.0.0 + */ +class Selector implements EventHandler, \IteratorAggregate, \Countable { + protected $selectors = array(); + protected $currSelector; + protected $selectorGroups = array(); + protected $groupIndex = 0; + + public function __construct() { + $this->currSelector = new SimpleSelector(); + + $this->selectors[$this->groupIndex][] = $this->currSelector; + } + + public function getIterator() { + return new \ArrayIterator($this->selectors); + } + + /** + * Get the array of SimpleSelector objects. + * + * Normally, one iterates over a Selector. However, if it is + * necessary to get the selector array and manipulate it, this + * method can be used. + */ + public function toArray() { + return $this->selectors; + } + + public function count() { + return count($this->selectors); + } + + public function elementID($id) { + $this->currSelector->id = $id; + } + public function element($name) { + $this->currSelector->element = $name; + } + public function elementNS($name, $namespace = NULL) { + $this->currSelector->ns = $namespace; + $this->currSelector->element = $name; + } + public function anyElement() { + $this->currSelector->element = '*'; + } + public function anyElementInNS($ns) { + $this->currSelector->ns = $ns; + $this->currSelector->element = '*'; + } + public function elementClass($name) { + $this->currSelector->classes[] = $name; + } + public function attribute($name, $value = NULL, $operation = EventHandler::isExactly) { + $this->currSelector->attributes[] = array( + 'name' => $name, + 'value' => $value, + 'op' => $operation, + ); + } + public function attributeNS($name, $ns, $value = NULL, $operation = EventHandler::isExactly) { + $this->currSelector->attributes[] = array( + 'name' => $name, + 'value' => $value, + 'op' => $operation, + 'ns' => $ns, + ); + } + public function pseudoClass($name, $value = NULL) { + $this->currSelector->pseudoClasses[] = array('name' => $name, 'value' => $value); + } + public function pseudoElement($name) { + $this->currSelector->pseudoElements[] = $name; + } + public function combinator($combinatorName) { + $this->currSelector->combinator = $combinatorName; + $this->currSelector = new SimpleSelector(); + array_unshift($this->selectors[$this->groupIndex], $this->currSelector); + //$this->selectors[]= $this->currSelector; + } + public function directDescendant() { + $this->combinator(SimpleSelector::directDescendant); + } + public function adjacent() { + $this->combinator(SimpleSelector::adjacent); + } + public function anotherSelector() { + $this->groupIndex++; + $this->currSelector = new SimpleSelector(); + $this->selectors[$this->groupIndex] = array($this->currSelector); + } + public function sibling() { + $this->combinator(SimpleSelector::sibling); + } + public function anyDescendant() { + $this->combinator(SimpleSelector::anyDescendant); + } +} diff --git a/lib/querypath/src/QueryPath/CSS/SimpleSelector.php b/lib/querypath/src/QueryPath/CSS/SimpleSelector.php new file mode 100644 index 0000000..3fcc796 --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/SimpleSelector.php @@ -0,0 +1,138 @@ +<?php +/** @file + * + * A simple selector. + * + */ + +namespace QueryPath\CSS; + +/** + * Models a simple selector. + * + * CSS Selectors are composed of one or more simple selectors, where + * each simple selector may have any of the following components: + * + * - An element name (or wildcard *) + * - An ID (#foo) + * - One or more classes (.foo.bar) + * - One or more attribute matchers ([foo=bar]) + * - One or more pseudo-classes (:foo) + * - One or more pseudo-elements (::first) + * + * For performance reasons, this object has been kept as sparse as + * possible. + * + * @since QueryPath 3.x + * @author M Butcher + * + */ +class SimpleSelector { + + const adjacent = 1; + const directDescendant = 2; + const anotherSelector = 4; + const sibling = 8; + const anyDescendant = 16; + + public $element; + public $ns; + public $id; + public $classes = array(); + public $attributes = array(); + public $pseudoClasses = array(); + public $pseudoElements = array(); + public $combinator; + + public static function attributeOperator($code) { + switch($code) { + case EventHandler::containsWithSpace: + return '~='; + case EventHandler::containsWithHyphen: + return '|='; + case EventHandler::containsInString: + return '*='; + case EventHandler::beginsWith: + return '^='; + case EventHandler::endsWith: + return '$='; + default: + return '='; + } + } + + public static function combinatorOperator($code) { + switch ($code) { + case self::adjacent: + return '+'; + case self::directDescendant: + return '>'; + case self::sibling: + return '~'; + case self::anotherSelector: + return ', '; + case self::anyDescendant: + return ' '; + } + } + + public function __construct() { + } + + public function notEmpty() { + return !empty($element) + && !empty($id) + && !empty($classes) + && !empty($combinator) + && !empty($attributes) + && !empty($pseudoClasses) + && !empty($pseudoElements) + ; + } + + public function __tostring() { + $buffer = array(); + try { + + if (!empty($this->ns)) { + $buffer[] = $this->ns; $buffer[] = '|'; + } + if (!empty($this->element)) $buffer[] = $this->element; + if (!empty($this->id)) $buffer[] = '#' . $this->id; + if (!empty($this->attributes)) { + foreach ($this->attributes as $attr) { + $buffer[] = '['; + if(!empty($attr['ns'])) $buffer[] = $attr['ns'] . '|'; + $buffer[] = $attr['name']; + if (!empty($attr['value'])) { + $buffer[] = self::attributeOperator($attr['op']); + $buffer[] = $attr['value']; + } + $buffer[] = ']'; + } + } + if (!empty($this->pseudoClasses)) { + foreach ($this->pseudoClasses as $ps) { + $buffer[] = ':' . $ps['name']; + if (isset($ps['value'])) { + $buffer[] = '(' . $ps['value'] . ')'; + } + } + } + foreach ($this->pseudoElements as $pe) { + $buffer[] = '::' . $pe; + } + + if (!empty($this->combinator)) { + $buffer[] = self::combinatorOperator($this->combinator); + } + + } + catch (\Exception $e) { + return $e->getMessage(); + } + + return implode('', $buffer); + } + +} diff --git a/lib/querypath/src/QueryPath/CSS/Token.php b/lib/querypath/src/QueryPath/CSS/Token.php new file mode 100644 index 0000000..3c31ef4 --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/Token.php @@ -0,0 +1,60 @@ +<?php +/** @file + * Parser tokens. + */ +namespace QueryPath\CSS; +/** + * Tokens for CSS. + * This class defines the recognized tokens for the parser, and also + * provides utility functions for error reporting. + * + * @ingroup querypath_css + */ +final class Token { + const char = 0; + const star = 1; + const rangle = 2; + const dot = 3; + const octo = 4; + const rsquare = 5; + const lsquare = 6; + const colon = 7; + const rparen = 8; + const lparen = 9; + const plus = 10; + const tilde = 11; + const eq = 12; + const pipe = 13; + const comma = 14; + const white = 15; + const quote = 16; + const squote = 17; + const bslash = 18; + const carat = 19; + const dollar = 20; + const at = 21; // This is not in the spec. Apparently, old broken CSS uses it. + + // In legal range for string. + const stringLegal = 99; + + /** + * Get a name for a given constant. Used for error handling. + */ + static function name($const_int) { + $a = array('character', 'star', 'right angle bracket', + 'dot', 'octothorp', 'right square bracket', 'left square bracket', + 'colon', 'right parenthesis', 'left parenthesis', 'plus', 'tilde', + 'equals', 'vertical bar', 'comma', 'space', 'quote', 'single quote', + 'backslash', 'carat', 'dollar', 'at'); + if (isset($a[$const_int]) && is_numeric($const_int)) { + return $a[$const_int]; + } + elseif ($const_int == 99) { + return 'a legal non-alphanumeric character'; + } + elseif ($const_int == FALSE) { + return 'end of file'; + } + return sprintf('illegal character (%s)', $const_int); + } +} diff --git a/lib/querypath/src/QueryPath/CSS/Traverser.php b/lib/querypath/src/QueryPath/CSS/Traverser.php new file mode 100644 index 0000000..6d16d63 --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/Traverser.php @@ -0,0 +1,38 @@ +<?php +/** + * @file + * The main Traverser interface. + */ + +namespace QueryPath\CSS; + +/** + * An object capable of walking (and searching) a datastructure. + */ +interface Traverser { + /** + * Process a CSS selector and find matches. + * + * This specifies a query to be run by the Traverser. A given + * Traverser may, in practice, delay the finding until some later time + * but must return the found results when getMatches() is called. + * + * @param string $selector + * A selector. Typically this is a CSS 3 Selector. + * @return \Traverser + * The Traverser that can return matches. + */ + public function find($selector); + /** + * Get the results of a find() operation. + * + * Return an array of matching items. + * + * @return array + * An array of matched values. The specific data type in the matches + * will differ depending on the data type searched, but in the core + * QueryPath implementation, this will be an array of DOMNode + * objects. + */ + public function matches(); +} diff --git a/lib/querypath/src/QueryPath/DOMQuery.php b/lib/querypath/src/QueryPath/DOMQuery.php new file mode 100644 index 0000000..15ab893 --- /dev/null +++ b/lib/querypath/src/QueryPath/DOMQuery.php @@ -0,0 +1,3993 @@ +<?php +/** + * @file + * This houses the class formerly called QueryPath. + * + * As of QueryPath 3.0.0, the class was renamed QueryPath::DOMQuery. This + * was done for a few reasons: + * - The library has been refactored, and it made more sense to call the top + * level class QueryPath. This is not the top level class. + * - There have been requests for a JSONQuery class, which would be the + * natural complement of DOMQuery. + */ + +namespace QueryPath; + +use \QueryPath\CSS\QueryPathEventHandler; +use \QueryPath; +use \Masterminds\HTML5; + + +/** + * The DOMQuery object is the primary tool in this library. + * + * To create a new DOMQuery, use QueryPath::with() or qp() function. + * + * If you are new to these documents, start at the QueryPath.php page. + * There you will find a quick guide to the tools contained in this project. + * + * A note on serialization: DOMQuery uses DOM classes internally, and those + * do not serialize well at all. In addition, DOMQuery may contain many + * extensions, and there is no guarantee that extensions can serialize. The + * moral of the story: Don't serialize DOMQuery. + * + * @see qp() + * @see QueryPath.php + * @ingroup querypath_core + */ +class DOMQuery implements \QueryPath\Query, \IteratorAggregate, \Countable { + + /** + * Default parser flags. + * + * These are flags that will be used if no global or local flags override them. + * @since 2.0 + */ + const DEFAULT_PARSER_FLAGS = NULL; + + const JS_CSS_ESCAPE_CDATA = '\\1'; + const JS_CSS_ESCAPE_CDATA_CCOMMENT = '/* \\1 */'; + const JS_CSS_ESCAPE_CDATA_DOUBLESLASH = '// \\1'; + const JS_CSS_ESCAPE_NONE = ''; + + //const IGNORE_ERRORS = 1544; //E_NOTICE | E_USER_WARNING | E_USER_NOTICE; + private $errTypes = 771; //E_ERROR; | E_USER_ERROR; + + /** + * The base DOMDocument. + */ + protected $document = NULL; + private $options = array( + 'parser_flags' => NULL, + 'omit_xml_declaration' => FALSE, + 'replace_entities' => FALSE, + 'exception_level' => 771, // E_ERROR | E_USER_ERROR | E_USER_WARNING | E_WARNING + 'ignore_parser_warnings' => FALSE, + 'escape_xhtml_js_css_sections' => self::JS_CSS_ESCAPE_CDATA_CCOMMENT, + ); + /** + * The array of matches. + */ + protected $matches = array(); + /** + * The last array of matches. + */ + protected $last = array(); // Last set of matches. + private $ext = array(); // Extensions array. + + /** + * The number of current matches. + * + * @see count() + */ + public $length = 0; + + /** + * Constructor. + * + * Typically, a new DOMQuery is created by QueryPath::with(), QueryPath::withHTML(), + * qp(), or htmlqp(). + * + * @param mixed $document + * A document-like object. + * @param string $string + * A CSS 3 Selector + * @param array $options + * An associative array of options. + * @see qp() + */ + public function __construct($document = NULL, $string = NULL, $options = array()) { + $string = trim($string); + $this->options = $options + Options::get() + $this->options; + + $parser_flags = isset($options['parser_flags']) ? $options['parser_flags'] : self::DEFAULT_PARSER_FLAGS; + if (!empty($this->options['ignore_parser_warnings'])) { + // Don't convert parser warnings into exceptions. + $this->errTypes = 257; //E_ERROR | E_USER_ERROR; + } + elseif (isset($this->options['exception_level'])) { + // Set the error level at which exceptions will be thrown. By default, + // QueryPath will throw exceptions for + // E_ERROR | E_USER_ERROR | E_WARNING | E_USER_WARNING. + $this->errTypes = $this->options['exception_level']; + } + + // Empty: Just create an empty QP. + if (empty($document)) { + $this->document = isset($this->options['encoding']) ? new \DOMDocument('1.0', $this->options['encoding']) : new \DOMDocument(); + $this->setMatches(new \SplObjectStorage()); + } + // Figure out if document is DOM, HTML/XML, or a filename + elseif (is_object($document)) { + + // This is the most frequent object type. + if ($document instanceof \SplObjectStorage) { + $this->matches = $document; + if ($document->count() != 0) { + $first = $this->getFirstMatch(); + if (!empty($first->ownerDocument)) { + $this->document = $first->ownerDocument; + } + } + } + elseif ($document instanceof DOMQuery) { + //$this->matches = $document->get(NULL, TRUE); + $this->setMatches($document->get(NULL, TRUE)); + if ($this->matches->count() > 0) + $this->document = $this->getFirstMatch()->ownerDocument; + } + elseif ($document instanceof \DOMDocument) { + $this->document = $document; + //$this->matches = $this->matches($document->documentElement); + $this->setMatches($document->documentElement); + } + elseif ($document instanceof \DOMNode) { + $this->document = $document->ownerDocument; + //$this->matches = array($document); + $this->setMatches($document); + } + elseif ($document instanceof \Masterminds\HTML5) { + $this->document = $document; + $this->setMatches($document->documentElement); + } + elseif ($document instanceof \SimpleXMLElement) { + $import = dom_import_simplexml($document); + $this->document = $import->ownerDocument; + //$this->matches = array($import); + $this->setMatches($import); + } + else { + throw new \QueryPath\Exception('Unsupported class type: ' . get_class($document)); + } + } + elseif (is_array($document)) { + //trigger_error('Detected deprecated array support', E_USER_NOTICE); + if (!empty($document) && $document[0] instanceof \DOMNode) { + $found = new \SplObjectStorage(); + foreach ($document as $item) $found->attach($item); + //$this->matches = $found; + $this->setMatches($found); + $this->document = $this->getFirstMatch()->ownerDocument; + } + } + elseif ($this->isXMLish($document)) { + // $document is a string with XML + $this->document = $this->parseXMLString($document); + $this->setMatches($this->document->documentElement); + } + else { + + // $document is a filename + $context = empty($options['context']) ? NULL : $options['context']; + $this->document = $this->parseXMLFile($document, $parser_flags, $context); + $this->setMatches($this->document->documentElement); + } + + // Globally set the output option. + if (isset($this->options['format_output']) && $this->options['format_output'] == FALSE) { + $this->document->formatOutput = FALSE; + } + else { + $this->document->formatOutput = TRUE; + } + + // Do a find if the second param was set. + if (isset($string) && strlen($string) > 0) { + // We don't issue a find because that creates a new DOMQuery. + //$this->find($string); + + $query = new \QueryPath\CSS\DOMTraverser($this->matches); + $query->find($string); + $this->setMatches($query->matches()); + } + } + + + /** + * Get the effective options for the current DOMQuery object. + * + * This returns an associative array of all of the options as set + * for the current DOMQuery object. This includes default options, + * options directly passed in via {@link qp()} or the constructor, + * an options set in the QueryPath::Options object. + * + * The order of merging options is this: + * - Options passed in using qp() are highest priority, and will + * override other options. + * - Options set with QueryPath::Options will override default options, + * but can be overridden by options passed into qp(). + * - Default options will be used when no overrides are present. + * + * This function will return the options currently used, with the above option + * overriding having been calculated already. + * + * @return array + * An associative array of options, calculated from defaults and overridden + * options. + * @see qp() + * @see QueryPath::Options::set() + * @see QueryPath::Options::merge() + * @since 2.0 + */ + public function getOptions() { + return $this->options; + } + + /** + * Select the root element of the document. + * + * This sets the current match to the document's root element. For + * practical purposes, this is the same as: + * @code + * qp($someDoc)->find(':root'); + * @endcode + * However, since it doesn't invoke a parser, it has less overhead. It also + * works in cases where the QueryPath has been reduced to zero elements (a + * case that is not handled by find(':root') because there is no element + * whose root can be found). + * + * @param string $selector + * A selector. If this is supplied, QueryPath will navigate to the + * document root and then run the query. (Added in QueryPath 2.0 Beta 2) + * @return \QueryPath\DOMQuery + * The DOMQuery object, wrapping the root element (document element) + * for the current document. + */ + public function top($selector = NULL) { + //$this->setMatches($this->document->documentElement); + //return !empty($selector) ? $this->find($selector) : $this; + return $this->inst($this->document->documentElement, $selector, $this->options); + } + + /** + * Given a CSS Selector, find matching items. + * + * @param string $selector + * CSS 3 Selector + * @return \QueryPath\DOMQuery + * @see filter() + * @see is() + * @todo If a find() returns zero matches, then a subsequent find() will + * also return zero matches, even if that find has a selector like :root. + * The reason for this is that the {@link QueryPathEventHandler} does + * not set the root of the document tree if it cannot find any elements + * from which to determine what the root is. The workaround is to use + * {@link top()} to select the root element again. + */ + public function find($selector) { + + //$query = new QueryPathEventHandler($this->matches); + $query = new \QueryPath\CSS\DOMTraverser($this->matches); + $query->find($selector); + //$this->setMatches($query->matches()); + //return $this; + return $this->inst($query->matches(), NULL , $this->options); + } + public function findInPlace($selector) { + $query = new \QueryPath\CSS\DOMTraverser($this->matches); + $query->find($selector); + $this->setMatches($query->matches()); + return $this; + } + + /** + * Execute an XPath query and store the results in the QueryPath. + * + * Most methods in this class support CSS 3 Selectors. Sometimes, though, + * XPath provides a finer-grained query language. Use this to execute + * XPath queries. + * + * Beware, though. DOMQuery works best on DOM Elements, but an XPath + * query can return other nodes, strings, and values. These may not work with + * other QueryPath functions (though you will be able to access the + * values with {@link get()}). + * + * @param string $query + * An XPath query. + * @param array $options + * Currently supported options are: + * - 'namespace_prefix': And XML namespace prefix to be used as the default. Used + * in conjunction with 'namespace_uri' + * - 'namespace_uri': The URI to be used as the default namespace URI. Used + * with 'namespace_prefix' + * @return \QueryPath\DOMQuery + * A DOMQuery object wrapping the results of the query. + * @see find() + * @author M Butcher + * @author Xavier Prud'homme + */ + public function xpath($query, $options = array()) { + $xpath = new \DOMXPath($this->document); + + // Register a default namespace. + if (!empty($options['namespace_prefix']) && !empty($options['namespace_uri'])) { + $xpath->registerNamespace($options['namespace_prefix'], $options['namespace_uri']); + } + + $found = new \SplObjectStorage(); + foreach ($this->matches as $item) { + $nl = $xpath->query($query, $item); + if ($nl->length > 0) { + for ($i = 0; $i < $nl->length; ++$i) $found->attach($nl->item($i)); + } + } + return $this->inst($found, NULL, $this->options); + //$this->setMatches($found); + //return $this; + } + + /** + * Get the number of elements currently wrapped by this object. + * + * Note that there is no length property on this object. + * + * @return int + * Number of items in the object. + * @deprecated QueryPath now implements Countable, so use count(). + */ + public function size() { + return $this->matches->count(); + } + + /** + * Get the number of elements currently wrapped by this object. + * + * Since DOMQuery is Countable, the PHP count() function can also + * be used on a DOMQuery. + * + * @code + * <?php + * count(qp($xml, 'div')); + * ?> + * @endcode + * + * @return int + * The number of matches in the DOMQuery. + */ + public function count() { + return $this->matches->count(); + } + + /** + * Get one or all elements from this object. + * + * When called with no paramaters, this returns all objects wrapped by + * the DOMQuery. Typically, these are DOMElement objects (unless you have + * used map(), xpath(), or other methods that can select + * non-elements). + * + * When called with an index, it will return the item in the DOMQuery with + * that index number. + * + * Calling this method does not change the DOMQuery (e.g. it is + * non-destructive). + * + * You can use qp()->get() to iterate over all elements matched. You can + * also iterate over qp() itself (DOMQuery implementations must be Traversable). + * In the later case, though, each item + * will be wrapped in a DOMQuery object. To learn more about iterating + * in QueryPath, see {@link examples/techniques.php}. + * + * @param int $index + * If specified, then only this index value will be returned. If this + * index is out of bounds, a NULL will be returned. + * @param boolean $asObject + * If this is TRUE, an SplObjectStorage object will be returned + * instead of an array. This is the preferred method for extensions to use. + * @return mixed + * If an index is passed, one element will be returned. If no index is + * present, an array of all matches will be returned. + * @see eq() + * @see SplObjectStorage + */ + public function get($index = NULL, $asObject = FALSE) { + if (isset($index)) { + return ($this->size() > $index) ? $this->getNthMatch($index) : NULL; + } + // Retain support for legacy. + if (!$asObject) { + $matches = array(); + foreach ($this->matches as $m) $matches[] = $m; + return $matches; + } + return $this->matches; + } + + /** + * Get the namespace of the current element. + * + * If QP is currently pointed to a list of elements, this will get the + * namespace of the first element. + */ + public function ns() { + return $this->get(0)->namespaceURI; + } + + /** + * Get the DOMDocument that we currently work with. + * + * This returns the current DOMDocument. Any changes made to this document will be + * accessible to DOMQuery, as both will share access to the same object. + * + * @return DOMDocument + */ + public function document() { + return $this->document; + } + + /** + * On an XML document, load all XIncludes. + * + * @return \QueryPath\DOMQuery + */ + public function xinclude() { + $this->document->xinclude(); + return $this; + } + + /** + * Get all current elements wrapped in an array. + * Compatibility function for jQuery 1.4, but identical to calling {@link get()} + * with no parameters. + * + * @return array + * An array of DOMNodes (typically DOMElements). + */ + public function toArray() { + return $this->get(); + } + /** + * Get/set an attribute. + * - If no parameters are specified, this returns an associative array of all + * name/value pairs. + * - If both $name and $value are set, then this will set the attribute name/value + * pair for all items in this object. + * - If $name is set, and is an array, then + * all attributes in the array will be set for all items in this object. + * - If $name is a string and is set, then the attribute value will be returned. + * + * When an attribute value is retrieved, only the attribute value of the FIRST + * match is returned. + * + * @param mixed $name + * The name of the attribute or an associative array of name/value pairs. + * @param string $value + * A value (used only when setting an individual property). + * @return mixed + * If this was a setter request, return the DOMQuery object. If this was + * an access request (getter), return the string value. + * @see removeAttr() + * @see tag() + * @see hasAttr() + * @see hasClass() + */ + public function attr($name = NULL, $value = NULL) { + + // Default case: Return all attributes as an assoc array. + if (is_null($name)) { + if ($this->matches->count() == 0) return NULL; + $ele = $this->getFirstMatch(); + $buffer = array(); + + // This does not appear to be part of the DOM + // spec. Nor is it documented. But it works. + foreach ($ele->attributes as $name => $attrNode) { + $buffer[$name] = $attrNode->value; + } + return $buffer; + } + + // multi-setter + if (is_array($name)) { + foreach ($name as $k => $v) { + foreach ($this->matches as $m) $m->setAttribute($k, $v); + } + return $this; + } + // setter + if (isset($value)) { + foreach ($this->matches as $m) $m->setAttribute($name, $value); + return $this; + } + + //getter + if ($this->matches->count() == 0) return NULL; + + // Special node type handler: + if ($name == 'nodeType') { + return $this->getFirstMatch()->nodeType; + } + + // Always return first match's attr. + return $this->getFirstMatch()->getAttribute($name); + } + /** + * Check to see if the given attribute is present. + * + * This returns TRUE if <em>all</em> selected items have the attribute, or + * FALSE if at least one item does not have the attribute. + * + * @param string $attrName + * The attribute name. + * @return boolean + * TRUE if all matches have the attribute, FALSE otherwise. + * @since 2.0 + * @see attr() + * @see hasClass() + */ + public function hasAttr($attrName) { + foreach ($this->matches as $match) { + if (!$match->hasAttribute($attrName)) return FALSE; + } + return TRUE; + } + + /** + * Set/get a CSS value for the current element(s). + * This sets the CSS value for each element in the DOMQuery object. + * It does this by setting (or getting) the style attribute (without a namespace). + * + * For example, consider this code: + * @code + * <?php + * qp(HTML_STUB, 'body')->css('background-color','red')->html(); + * ?> + * @endcode + * This will return the following HTML: + * @code + * <body style="background-color: red"/> + * @endcode + * + * If no parameters are passed into this function, then the current style + * element will be returned unparsed. Example: + * @code + * <?php + * qp(HTML_STUB, 'body')->css('background-color','red')->css(); + * ?> + * @endcode + * This will return the following: + * @code + * background-color: red + * @endcode + * + * As of QueryPath 2.1, existing style attributes will be merged with new attributes. + * (In previous versions of QueryPath, a call to css() overwrite the existing style + * values). + * + * @param mixed $name + * If this is a string, it will be used as a CSS name. If it is an array, + * this will assume it is an array of name/value pairs of CSS rules. It will + * apply all rules to all elements in the set. + * @param string $value + * The value to set. This is only set if $name is a string. + * @return \QueryPath\DOMQuery + */ + public function css($name = NULL, $value = '') { + if (empty($name)) { + return $this->attr('style'); + } + + // Get any existing CSS. + $css = array(); + foreach ($this->matches as $match) { + $style = $match->getAttribute('style'); + if (!empty($style)) { + // XXX: Is this sufficient? + $style_array = explode(';', $style); + foreach ($style_array as $item) { + $item = trim($item); + + // Skip empty attributes. + if (strlen($item) == 0) continue; + + list($css_att, $css_val) = explode(':',$item, 2); + $css[$css_att] = trim($css_val); + } + } + } + + if (is_array($name)) { + // Use array_merge instead of + to preserve order. + $css = array_merge($css, $name); + } + else { + $css[$name] = $value; + } + + // Collapse CSS into a string. + $format = '%s: %s;'; + $css_string = ''; + foreach ($css as $n => $v) { + $css_string .= sprintf($format, $n, trim($v)); + } + + $this->attr('style', $css_string); + return $this; + } + + /** + * Insert or retrieve a Data URL. + * + * When called with just $attr, it will fetch the result, attempt to decode it, and + * return an array with the MIME type and the application data. + * + * When called with both $attr and $data, it will inject the data into all selected elements + * So @code$qp->dataURL('src', file_get_contents('my.png'), 'image/png')@endcode will inject + * the given PNG image into the selected elements. + * + * The current implementation only knows how to encode and decode Base 64 data. + * + * Note that this is known *not* to work on IE 6, but should render fine in other browsers. + * + * @param string $attr + * The name of the attribute. + * @param mixed $data + * The contents to inject as the data. The value can be any one of the following: + * - A URL: If this is given, then the subsystem will read the content from that URL. THIS + * MUST BE A FULL URL, not a relative path. + * - A string of data: If this is given, then the subsystem will encode the string. + * - A stream or file handle: If this is given, the stream's contents will be encoded + * and inserted as data. + * (Note that we make the assumption here that you would never want to set data to be + * a URL. If this is an incorrect assumption, file a bug.) + * @param string $mime + * The MIME type of the document. + * @param resource $context + * A valid context. Use this only if you need to pass a stream context. This is only necessary + * if $data is a URL. (See {@link stream_context_create()}). + * @return \QueryPath\DOMQuery|string + * If this is called as a setter, this will return a DOMQuery object. Otherwise, it + * will attempt to fetch data out of the attribute and return that. + * @see http://en.wikipedia.org/wiki/Data:_URL + * @see attr() + * @since 2.1 + */ + public function dataURL($attr, $data = NULL, $mime = 'application/octet-stream', $context = NULL) { + if (is_null($data)) { + // Attempt to fetch the data + $data = $this->attr($attr); + if (empty($data) || is_array($data) || strpos($data, 'data:') !== 0) { + return; + } + + // So 1 and 2 should be MIME types, and 3 should be the base64-encoded data. + $regex = '/^data:([a-zA-Z0-9]+)\/([a-zA-Z0-9]+);base64,(.*)$/'; + $matches = array(); + preg_match($regex, $data, $matches); + + if (!empty($matches)) { + $result = array( + 'mime' => $matches[1] . '/' . $matches[2], + 'data' => base64_decode($matches[3]), + ); + return $result; + } + } + else { + $attVal = \QueryPath::encodeDataURL($data, $mime, $context); + return $this->attr($attr, $attVal); + } + } + + /** + * Remove the named attribute from all elements in the current DOMQuery. + * + * This will remove any attribute with the given name. It will do this on each + * item currently wrapped by DOMQuery. + * + * As is the case in jQuery, this operation is not considered destructive. + * + * @param string $name + * Name of the parameter to remove. + * @return \QueryPath\DOMQuery + * The DOMQuery object with the same elements. + * @see attr() + */ + public function removeAttr($name) { + foreach ($this->matches as $m) { + //if ($m->hasAttribute($name)) + $m->removeAttribute($name); + } + return $this; + } + /** + * Reduce the matched set to just one. + * + * This will take a matched set and reduce it to just one item -- the item + * at the index specified. This is a destructive operation, and can be undone + * with {@link end()}. + * + * @param $index + * The index of the element to keep. The rest will be + * discarded. + * @return \QueryPath\DOMQuery + * @see get() + * @see is() + * @see end() + */ + public function eq($index) { + return $this->inst($this->getNthMatch($index), NULL, $this->options); + // XXX: Might there be a more efficient way of doing this? + //$this->setMatches($this->getNthMatch($index)); + //return $this; + } + /** + * Given a selector, this checks to see if the current set has one or more matches. + * + * Unlike jQuery's version, this supports full selectors (not just simple ones). + * + * @param string $selector + * The selector to search for. As of QueryPath 2.1.1, this also supports passing a + * DOMNode object. + * @return boolean + * TRUE if one or more elements match. FALSE if no match is found. + * @see get() + * @see eq() + */ + public function is($selector) { + + if (is_object($selector)) { + if ($selector instanceof \DOMNode) { + return count($this->matches) == 1 && $selector->isSameNode($this->get(0)); + } + elseif ($selector instanceof \Traversable) { + if (count($selector) != count($this->matches)) { + return FALSE; + } + // Without $seen, there is an edge case here if $selector contains the same object + // more than once, but the counts are equal. For example, [a, a, a, a] will + // pass an is() on [a, b, c, d]. We use the $seen SPLOS to prevent this. + $seen = new \SplObjectStorage(); + foreach ($selector as $item) { + if (!$this->matches->contains($item) || $seen->contains($item)) { + return FALSE; + } + $seen->attach($item); + } + return TRUE; + } + throw new \QueryPath\Exception('Cannot compare an object to a DOMQuery.'); + return FALSE; + } + + // Testing based on Issue #70. + //fprintf(STDOUT, __FUNCTION__ .' found %d', $this->find($selector)->count()); + return $this->branch($selector)->count() > 0; + + // Old version: + //foreach ($this->matches as $m) { + //$q = new \QueryPath\CSS\QueryPathEventHandler($m); + //if ($q->find($selector)->getMatches()->count()) { + //return TRUE; + //} + //} + //return FALSE; + } + /** + * Filter a list down to only elements that match the selector. + * Use this, for example, to find all elements with a class, or with + * certain children. + * + * @param string $selector + * The selector to use as a filter. + * @return \QueryPath\DOMQuery + * The DOMQuery with non-matching items filtered out. + * @see filterLambda() + * @see filterCallback() + * @see map() + * @see find() + * @see is() + */ + public function filter($selector) { + + $found = new \SplObjectStorage(); + $tmp = new \SplObjectStorage(); + foreach ($this->matches as $m) { + $tmp->attach($m); + // Seems like this should be right... but it fails unit + // tests. Need to compare to jQuery. + // $query = new \QueryPath\CSS\DOMTraverser($tmp, TRUE, $m); + $query = new \QueryPath\CSS\DOMTraverser($tmp); + $query->find($selector); + if (count($query->matches())) { + $found->attach($m); + } + $tmp->detach($m); + } + return $this->inst($found, NULL, $this->options); + } + + /** + * Sort the contents of the QueryPath object. + * + * By default, this does not change the order of the elements in the + * DOM. Instead, it just sorts the internal list. However, if TRUE + * is passed in as the second parameter then QueryPath will re-order + * the DOM, too. + * + * @attention + * DOM re-ordering is done by finding the location of the original first + * item in the list, and then placing the sorted list at that location. + * + * The argument $compartor is a callback, such as a function name or a + * closure. The callback receives two DOMNode objects, which you can use + * as DOMNodes, or wrap in QueryPath objects. + * + * A simple callback: + * @code + * <?php + * $comp = function (\DOMNode $a, \DOMNode $b) { + * if ($a->textContent == $b->textContent) { + * return 0; + * } + * return $a->textContent > $b->textContent ? 1 : -1; + * }; + * $qp = QueryPath::with($xml, $selector)->sort($comp); + * ?> + * @endcode + * + * The above sorts the matches into lexical order using the text of each node. + * If you would prefer to work with QueryPath objects instead of DOMNode + * objects, you may prefer something like this: + * + * @code + * <?php + * $comp = function (\DOMNode $a, \DOMNode $b) { + * $qpa = qp($a); + * $qpb = qp($b); + * + * if ($qpa->text() == $qpb->text()) { + * return 0; + * } + * return $qpa->text()> $qpb->text()? 1 : -1; + * }; + * + * $qp = QueryPath::with($xml, $selector)->sort($comp); + * ?> + * @endcode + * + * @param callback $comparator + * A callback. This will be called during sorting to compare two DOMNode + * objects. + * @param boolean $modifyDOM + * If this is TRUE, the sorted results will be inserted back into + * the DOM at the position of the original first element. + * @return \QueryPath\DOMQuery + * This object. + */ + public function sort($comparator, $modifyDOM = FALSE) { + // Sort as an array. + $list = iterator_to_array($this->matches); + + if (empty($list)) { + return $this; + } + + $oldFirst = $list[0]; + + usort($list, $comparator); + + // Copy back into SplObjectStorage. + $found = new \SplObjectStorage(); + foreach ($list as $node) { + $found->attach($node); + } + //$this->setMatches($found); + + + // Do DOM modifications only if necessary. + if ($modifyDOM) { + $placeholder = $oldFirst->ownerDocument->createElement('_PLACEHOLDER_'); + $placeholder = $oldFirst->parentNode->insertBefore($placeholder, $oldFirst); + $len = count($list); + for ($i = 0; $i < $len; ++$i) { + $node = $list[$i]; + $node = $node->parentNode->removeChild($node); + $placeholder->parentNode->insertBefore($node, $placeholder); + } + $placeholder->parentNode->removeChild($placeholder); + } + + return $this->inst($found, NULL, $this->options); + } + /** + * Filter based on a lambda function. + * + * The function string will be executed as if it were the body of a + * function. It is passed two arguments: + * - $index: The index of the item. + * - $item: The current Element. + * If the function returns boolean FALSE, the item will be removed from + * the list of elements. Otherwise it will be kept. + * + * Example: + * @code + * qp('li')->filterLambda('qp($item)->attr("id") == "test"'); + * @endcode + * + * The above would filter down the list to only an item whose ID is + * 'text'. + * + * @param string $fn + * Inline lambda function in a string. + * @return \QueryPath\DOMQuery + * @see filter() + * @see map() + * @see mapLambda() + * @see filterCallback() + */ + public function filterLambda($fn) { + $function = create_function('$index, $item', $fn); + $found = new \SplObjectStorage(); + $i = 0; + foreach ($this->matches as $item) + if ($function($i++, $item) !== FALSE) $found->attach($item); + + return $this->inst($found, NULL, $this->options); + } + + /** + * Use regular expressions to filter based on the text content of matched elements. + * + * Only items that match the given regular expression will be kept. All others will + * be removed. + * + * The regular expression is run against the <i>text content</i> (the PCDATA) of the + * elements. This is a way of filtering elements based on their content. + * + * Example: + * @code + * <?xml version="1.0"?> + * <div>Hello <i>World</i></div> + * @endcode + * + * @code + * <?php + * // This will be 1. + * qp($xml, 'div')->filterPreg('/World/')->size(); + * ?> + * @endcode + * + * The return value above will be 1 because the text content of @codeqp($xml, 'div')@endcode is + * @codeHello World@endcode. + * + * Compare this to the behavior of the <em>:contains()</em> CSS3 pseudo-class. + * + * @param string $regex + * A regular expression. + * @return \QueryPath\DOMQuery + * @see filter() + * @see filterCallback() + * @see preg_match() + */ + public function filterPreg($regex) { + + $found = new \SplObjectStorage(); + + foreach ($this->matches as $item) { + if (preg_match($regex, $item->textContent) > 0) { + $found->attach($item); + } + } + return $this->inst($found, NULL, $this->options); + } + /** + * Filter based on a callback function. + * + * A callback may be any of the following: + * - a function: 'my_func'. + * - an object/method combo: $obj, 'myMethod' + * - a class/method combo: 'MyClass', 'myMethod' + * Note that classes are passed in strings. Objects are not. + * + * Each callback is passed to arguments: + * - $index: The index position of the object in the array. + * - $item: The item to be operated upon. + * + * If the callback function returns FALSE, the item will be removed from the + * set of matches. Otherwise the item will be considered a match and left alone. + * + * @param callback $callback. + * A callback either as a string (function) or an array (object, method OR + * classname, method). + * @return \QueryPath\DOMQuery + * Query path object augmented according to the function. + * @see filter() + * @see filterLambda() + * @see map() + * @see is() + * @see find() + */ + public function filterCallback($callback) { + $found = new \SplObjectStorage(); + $i = 0; + if (is_callable($callback)) { + foreach($this->matches as $item) + if (call_user_func($callback, $i++, $item) !== FALSE) $found->attach($item); + } + else { + throw new \QueryPath\Exception('The specified callback is not callable.'); + } + return $this->inst($found, NULL, $this->options); + } + /** + * Filter a list to contain only items that do NOT match. + * + * @param string $selector + * A selector to use as a negation filter. If the filter is matched, the + * element will be removed from the list. + * @return \QueryPath\DOMQuery + * The DOMQuery object with matching items filtered out. + * @see find() + */ + public function not($selector) { + $found = new \SplObjectStorage(); + if ($selector instanceof \DOMElement) { + foreach ($this->matches as $m) if ($m !== $selector) $found->attach($m); + } + elseif (is_array($selector)) { + foreach ($this->matches as $m) { + if (!in_array($m, $selector, TRUE)) $found->attach($m); + } + } + elseif ($selector instanceof \SplObjectStorage) { + foreach ($this->matches as $m) if ($selector->contains($m)) $found->attach($m); + } + else { + foreach ($this->matches as $m) if (!QueryPath::with($m, NULL, $this->options)->is($selector)) $found->attach($m); + } + return $this->inst($found, NULL, $this->options); + } + /** + * Get an item's index. + * + * Given a DOMElement, get the index from the matches. This is the + * converse of {@link get()}. + * + * @param DOMElement $subject + * The item to match. + * + * @return mixed + * The index as an integer (if found), or boolean FALSE. Since 0 is a + * valid index, you should use strong equality (===) to test.. + * @see get() + * @see is() + */ + public function index($subject) { + + $i = 0; + foreach ($this->matches as $m) { + if ($m === $subject) { + return $i; + } + ++$i; + } + return FALSE; + } + /** + * Run a function on each item in a set. + * + * The mapping callback can return anything. Whatever it returns will be + * stored as a match in the set, though. This means that afer a map call, + * there is no guarantee that the elements in the set will behave correctly + * with other DOMQuery functions. + * + * Callback rules: + * - If the callback returns NULL, the item will be removed from the array. + * - If the callback returns an array, the entire array will be stored in + * the results. + * - If the callback returns anything else, it will be appended to the array + * of matches. + * + * @param callback $callback + * The function or callback to use. The callback will be passed two params: + * - $index: The index position in the list of items wrapped by this object. + * - $item: The current item. + * + * @return \QueryPath\DOMQuery + * The DOMQuery object wrapping a list of whatever values were returned + * by each run of the callback. + * + * @see DOMQuery::get() + * @see filter() + * @see find() + */ + public function map($callback) { + $found = new \SplObjectStorage(); + + if (is_callable($callback)) { + $i = 0; + foreach ($this->matches as $item) { + $c = call_user_func($callback, $i, $item); + if (isset($c)) { + if (is_array($c) || $c instanceof \Iterable) { + foreach ($c as $retval) { + if (!is_object($retval)) { + $tmp = new \stdClass(); + $tmp->textContent = $retval; + $retval = $tmp; + } + $found->attach($retval); + } + } + else { + if (!is_object($c)) { + $tmp = new \stdClass(); + $tmp->textContent = $c; + $c = $tmp; + } + $found->attach($c); + } + } + ++$i; + } + } + else { + throw new \QueryPath\Exception('Callback is not callable.'); + } + return $this->inst($found, NULL, $this->options); + } + /** + * Narrow the items in this object down to only a slice of the starting items. + * + * @param integer $start + * Where in the list of matches to begin the slice. + * @param integer $length + * The number of items to include in the slice. If nothing is specified, the + * all remaining matches (from $start onward) will be included in the sliced + * list. + * @return \QueryPath\DOMQuery + * @see array_slice() + */ + public function slice($start, $length = 0) { + $end = $length; + $found = new \SplObjectStorage(); + if ($start >= $this->size()) { + return $this->inst($found, NULL, $this->options); + } + + $i = $j = 0; + foreach ($this->matches as $m) { + if ($i >= $start) { + if ($end > 0 && $j >= $end) { + break; + } + $found->attach($m); + ++$j; + } + ++$i; + } + + return $this->inst($found, NULL, $this->options); + } + /** + * Run a callback on each item in the list of items. + * + * Rules of the callback: + * - A callback is passed two variables: $index and $item. (There is no + * special treatment of $this, as there is in jQuery.) + * - You will want to pass $item by reference if it is not an + * object (DOMNodes are all objects). + * - A callback that returns FALSE will stop execution of the each() loop. This + * works like break in a standard loop. + * - A TRUE return value from the callback is analogous to a continue statement. + * - All other return values are ignored. + * + * @param callback $callback + * The callback to run. + * @return \QueryPath\DOMQuery + * The DOMQuery. + * @see eachLambda() + * @see filter() + * @see map() + */ + public function each($callback) { + if (is_callable($callback)) { + $i = 0; + foreach ($this->matches as $item) { + if (call_user_func($callback, $i, $item) === FALSE) return $this; + ++$i; + } + } + else { + throw new \QueryPath\Exception('Callback is not callable.'); + } + return $this; + } + /** + * An each() iterator that takes a lambda function. + * + * @deprecated + * Since PHP 5.3 supports anonymous functions -- REAL Lambdas -- this + * method is not necessary and should be avoided. + * @param string $lambda + * The lambda function. This will be passed ($index, &$item). + * @return \QueryPath\DOMQuery + * The DOMQuery object. + * @see each() + * @see filterLambda() + * @see filterCallback() + * @see map() + */ + public function eachLambda($lambda) { + $index = 0; + foreach ($this->matches as $item) { + $fn = create_function('$index, &$item', $lambda); + if ($fn($index, $item) === FALSE) return $this; + ++$index; + } + return $this; + } + /** + * Insert the given markup as the last child. + * + * The markup will be inserted into each match in the set. + * + * The same element cannot be inserted multiple times into a document. DOM + * documents do not allow a single object to be inserted multiple times + * into the DOM. To insert the same XML repeatedly, we must first clone + * the object. This has one practical implication: Once you have inserted + * an element into the object, you cannot further manipulate the original + * element and expect the changes to be replciated in the appended object. + * (They are not the same -- there is no shared reference.) Instead, you + * will need to retrieve the appended object and operate on that. + * + * @param mixed $data + * This can be either a string (the usual case), or a DOM Element. + * @return \QueryPath\DOMQuery + * The DOMQuery object. + * @see appendTo() + * @see prepend() + * @throws QueryPath::Exception + * Thrown if $data is an unsupported object type. + */ + public function append($data) { + $data = $this->prepareInsert($data); + if (isset($data)) { + if (empty($this->document->documentElement) && $this->matches->count() == 0) { + // Then we assume we are writing to the doc root + $this->document->appendChild($data); + $found = new \SplObjectStorage(); + $found->attach($this->document->documentElement); + $this->setMatches($found); + } + else { + // You can only append in item once. So in cases where we + // need to append multiple times, we have to clone the node. + foreach ($this->matches as $m) { + // DOMDocumentFragments are even more troublesome, as they don't + // always clone correctly. So we have to clone their children. + if ($data instanceof \DOMDocumentFragment) { + foreach ($data->childNodes as $n) + $m->appendChild($n->cloneNode(TRUE)); + } + else { + // Otherwise a standard clone will do. + $m->appendChild($data->cloneNode(TRUE)); + } + + } + } + + } + return $this; + } + /** + * Append the current elements to the destination passed into the function. + * + * This cycles through all of the current matches and appends them to + * the context given in $destination. If a selector is provided then the + * $destination is queried (using that selector) prior to the data being + * appended. The data is then appended to the found items. + * + * @param DOMQuery $dest + * A DOMQuery object that will be appended to. + * @return \QueryPath\DOMQuery + * The original DOMQuery, unaltered. Only the destination DOMQuery will + * be modified. + * @see append() + * @see prependTo() + * @throws QueryPath::Exception + * Thrown if $data is an unsupported object type. + */ + public function appendTo(DOMQuery $dest) { + foreach ($this->matches as $m) $dest->append($m); + return $this; + } + /** + * Insert the given markup as the first child. + * + * The markup will be inserted into each match in the set. + * + * @param mixed $data + * This can be either a string (the usual case), or a DOM Element. + * @return \QueryPath\DOMQuery + * @see append() + * @see before() + * @see after() + * @see prependTo() + * @throws QueryPath::Exception + * Thrown if $data is an unsupported object type. + */ + public function prepend($data) { + $data = $this->prepareInsert($data); + if (isset($data)) { + foreach ($this->matches as $m) { + $ins = $data->cloneNode(TRUE); + if ($m->hasChildNodes()) + $m->insertBefore($ins, $m->childNodes->item(0)); + else + $m->appendChild($ins); + } + } + return $this; + } + /** + * Take all nodes in the current object and prepend them to the children nodes of + * each matched node in the passed-in DOMQuery object. + * + * This will iterate through each item in the current DOMQuery object and + * add each item to the beginning of the children of each element in the + * passed-in DOMQuery object. + * + * @see insertBefore() + * @see insertAfter() + * @see prepend() + * @see appendTo() + * @param DOMQuery $dest + * The destination DOMQuery object. + * @return \QueryPath\DOMQuery + * The original DOMQuery, unmodified. NOT the destination DOMQuery. + * @throws QueryPath::Exception + * Thrown if $data is an unsupported object type. + */ + public function prependTo(DOMQuery $dest) { + foreach ($this->matches as $m) $dest->prepend($m); + return $this; + } + + /** + * Insert the given data before each element in the current set of matches. + * + * This will take the give data (XML or HTML) and put it before each of the items that + * the DOMQuery object currently contains. Contrast this with after(). + * + * @param mixed $data + * The data to be inserted. This can be XML in a string, a DomFragment, a DOMElement, + * or the other usual suspects. (See {@link qp()}). + * @return \QueryPath\DOMQuery + * Returns the DOMQuery with the new modifications. The list of elements currently + * selected will remain the same. + * @see insertBefore() + * @see after() + * @see append() + * @see prepend() + * @throws QueryPath::Exception + * Thrown if $data is an unsupported object type. + */ + public function before($data) { + $data = $this->prepareInsert($data); + foreach ($this->matches as $m) { + $ins = $data->cloneNode(TRUE); + $m->parentNode->insertBefore($ins, $m); + } + + return $this; + } + /** + * Insert the current elements into the destination document. + * The items are inserted before each element in the given DOMQuery document. + * That is, they will be siblings with the current elements. + * + * @param DOMQuery $dest + * Destination DOMQuery document. + * @return \QueryPath\DOMQuery + * The current DOMQuery object, unaltered. Only the destination DOMQuery + * object is altered. + * @see before() + * @see insertAfter() + * @see appendTo() + * @throws QueryPath::Exception + * Thrown if $data is an unsupported object type. + */ + public function insertBefore(DOMQuery $dest) { + foreach ($this->matches as $m) $dest->before($m); + return $this; + } + /** + * Insert the contents of the current DOMQuery after the nodes in the + * destination DOMQuery object. + * + * @param DOMQuery $dest + * Destination object where the current elements will be deposited. + * @return \QueryPath\DOMQuery + * The present DOMQuery, unaltered. Only the destination object is altered. + * @see after() + * @see insertBefore() + * @see append() + * @throws QueryPath::Exception + * Thrown if $data is an unsupported object type. + */ + public function insertAfter(DOMQuery $dest) { + foreach ($this->matches as $m) $dest->after($m); + return $this; + } + /** + * Insert the given data after each element in the current DOMQuery object. + * + * This inserts the element as a peer to the currently matched elements. + * Contrast this with {@link append()}, which inserts the data as children + * of matched elements. + * + * @param mixed $data + * The data to be appended. + * @return \QueryPath\DOMQuery + * The DOMQuery object (with the items inserted). + * @see before() + * @see append() + * @throws QueryPath::Exception + * Thrown if $data is an unsupported object type. + */ + public function after($data) { + if (empty($data)) { + return $this; + } + $data = $this->prepareInsert($data); + foreach ($this->matches as $m) { + $ins = $data->cloneNode(TRUE); + if (isset($m->nextSibling)) + $m->parentNode->insertBefore($ins, $m->nextSibling); + else + $m->parentNode->appendChild($ins); + } + return $this; + } + /** + * Replace the existing element(s) in the list with a new one. + * + * @param mixed $new + * A DOMElement or XML in a string. This will replace all elements + * currently wrapped in the DOMQuery object. + * @return \QueryPath\DOMQuery + * The DOMQuery object wrapping <b>the items that were removed</b>. + * This remains consistent with the jQuery API. + * @see append() + * @see prepend() + * @see before() + * @see after() + * @see remove() + * @see replaceAll() + */ + public function replaceWith($new) { + $data = $this->prepareInsert($new); + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) { + $parent = $m->parentNode; + $parent->insertBefore($data->cloneNode(TRUE), $m); + $found->attach($parent->removeChild($m)); + } + return $this->inst($found, NULL, $this->options); + } + /** + * Remove the parent element from the selected node or nodes. + * + * This takes the given list of nodes and "unwraps" them, moving them out of their parent + * node, and then deleting the parent node. + * + * For example, consider this: + * + * @code + * <root><wrapper><content/></wrapper></root> + * @endcode + * + * Now we can run this code: + * @code + * qp($xml, 'content')->unwrap(); + * @endcode + * + * This will result in: + * + * @code + * <root><content/></root> + * @endcode + * This is the opposite of wrap(). + * + * <b>The root element cannot be unwrapped.</b> It has no parents. + * If you attempt to use unwrap on a root element, this will throw a + * QueryPath::Exception. (You can, however, "Unwrap" a child that is + * a direct descendant of the root element. This will remove the root + * element, and replace the child as the root element. Be careful, though. + * You cannot set more than one child as a root element.) + * + * @return \QueryPath\DOMQuery + * The DOMQuery object, with the same element(s) selected. + * @throws QueryPath::Exception + * An exception is thrown if one attempts to unwrap a root element. + * @see wrap() + * @since 2.1 + * @author mbutcher + */ + public function unwrap() { + + // We do this in two loops in order to + // capture the case where two matches are + // under the same parent. Othwerwise we might + // remove a match before we can move it. + $parents = new \SplObjectStorage(); + foreach ($this->matches as $m) { + + // Cannot unwrap the root element. + if ($m->isSameNode($m->ownerDocument->documentElement)) { + throw new \QueryPath\Exception('Cannot unwrap the root element.'); + } + + // Move children to peer of parent. + $parent = $m->parentNode; + $old = $parent->removeChild($m); + $parent->parentNode->insertBefore($old, $parent); + $parents->attach($parent); + } + + // Now that all the children are moved, we + // remove all of the parents. + foreach ($parents as $ele) { + $ele->parentNode->removeChild($ele); + } + + return $this; + } + /** + * Wrap each element inside of the given markup. + * + * Markup is usually a string, but it can also be a DOMNode, a document + * fragment, a SimpleXMLElement, or another DOMNode object (in which case + * the first item in the list will be used.) + * + * @param mixed $markup + * Markup that will wrap each element in the current list. + * @return \QueryPath\DOMQuery + * The DOMQuery object with the wrapping changes made. + * @see wrapAll() + * @see wrapInner() + */ + public function wrap($markup) { + $data = $this->prepareInsert($markup); + + // If the markup passed in is empty, we don't do any wrapping. + if (empty($data)) { + return $this; + } + + foreach ($this->matches as $m) { + if ($data instanceof \DOMDocumentFragment) { + $copy = $data->firstChild->cloneNode(true); + } else { + $copy = $data->cloneNode(true); + } + + // XXX: Should be able to avoid doing this over and over. + if ($copy->hasChildNodes()) { + $deepest = $this->deepestNode($copy); + // FIXME: Does this need a different data structure? + $bottom = $deepest[0]; + } + else + $bottom = $copy; + + $parent = $m->parentNode; + $parent->insertBefore($copy, $m); + $m = $parent->removeChild($m); + $bottom->appendChild($m); + //$parent->appendChild($copy); + } + return $this; + } + /** + * Wrap all elements inside of the given markup. + * + * So all elements will be grouped together under this single marked up + * item. This works by first determining the parent element of the first item + * in the list. It then moves all of the matching elements under the wrapper + * and inserts the wrapper where that first element was found. (This is in + * accordance with the way jQuery works.) + * + * Markup is usually XML in a string, but it can also be a DOMNode, a document + * fragment, a SimpleXMLElement, or another DOMNode object (in which case + * the first item in the list will be used.) + * + * @param string $markup + * Markup that will wrap all elements in the current list. + * @return \QueryPath\DOMQuery + * The DOMQuery object with the wrapping changes made. + * @see wrap() + * @see wrapInner() + */ + public function wrapAll($markup) { + if ($this->matches->count() == 0) return; + + $data = $this->prepareInsert($markup); + + if (empty($data)) { + return $this; + } + + if ($data instanceof \DOMDocumentFragment) { + $data = $data->firstChild->cloneNode(true); + } else { + $data = $data->cloneNode(true); + } + + if ($data->hasChildNodes()) { + $deepest = $this->deepestNode($data); + // FIXME: Does this need fixing? + $bottom = $deepest[0]; + } + else + $bottom = $data; + + $first = $this->getFirstMatch(); + $parent = $first->parentNode; + $parent->insertBefore($data, $first); + foreach ($this->matches as $m) { + $bottom->appendChild($m->parentNode->removeChild($m)); + } + return $this; + } + /** + * Wrap the child elements of each item in the list with the given markup. + * + * Markup is usually a string, but it can also be a DOMNode, a document + * fragment, a SimpleXMLElement, or another DOMNode object (in which case + * the first item in the list will be used.) + * + * @param string $markup + * Markup that will wrap children of each element in the current list. + * @return \QueryPath\DOMQuery + * The DOMQuery object with the wrapping changes made. + * @see wrap() + * @see wrapAll() + */ + public function wrapInner($markup) { + $data = $this->prepareInsert($markup); + + // No data? Short circuit. + if (empty($data)) return $this; + + foreach ($this->matches as $m) { + if ($data instanceof \DOMDocumentFragment) { + $wrapper = $data->firstChild->cloneNode(true); + } else { + $wrapper = $data->cloneNode(true); + } + + if ($wrapper->hasChildNodes()) { + $deepest = $this->deepestNode($wrapper); + // FIXME: ??? + $bottom = $deepest[0]; + } + else + $bottom = $wrapper; + + if ($m->hasChildNodes()) { + while($m->firstChild) { + $kid = $m->removeChild($m->firstChild); + $bottom->appendChild($kid); + } + } + + $m->appendChild($wrapper); + } + return $this; + } + /** + * Reduce the set of matches to the deepest child node in the tree. + * + * This loops through the matches and looks for the deepest child node of all of + * the matches. "Deepest", here, is relative to the nodes in the list. It is + * calculated as the distance from the starting node to the most distant child + * node. In other words, it is not necessarily the farthest node from the root + * element, but the farthest note from the matched element. + * + * In the case where there are multiple nodes at the same depth, all of the + * nodes at that depth will be included. + * + * @return \QueryPath\DOMQuery + * The DOMQuery wrapping the single deepest node. + */ + public function deepest() { + $deepest = 0; + $winner = new \SplObjectStorage(); + foreach ($this->matches as $m) { + $local_deepest = 0; + $local_ele = $this->deepestNode($m, 0, NULL, $local_deepest); + + // Replace with the new deepest. + if ($local_deepest > $deepest) { + $winner = new \SplObjectStorage(); + foreach ($local_ele as $lele) $winner->attach($lele); + $deepest = $local_deepest; + } + // Augument with other equally deep elements. + elseif ($local_deepest == $deepest) { + foreach ($local_ele as $lele) + $winner->attach($lele); + } + } + return $this->inst($winner, NULL, $this->options); + } + + /** + * A depth-checking function. Typically, it only needs to be + * invoked with the first parameter. The rest are used for recursion. + * @see deepest(); + * @param DOMNode $ele + * The element. + * @param int $depth + * The depth guage + * @param mixed $current + * The current set. + * @param DOMNode $deepest + * A reference to the current deepest node. + * @return array + * Returns an array of DOM nodes. + */ + protected function deepestNode(\DOMNode $ele, $depth = 0, $current = NULL, &$deepest = NULL) { + // FIXME: Should this use SplObjectStorage? + if (!isset($current)) $current = array($ele); + if (!isset($deepest)) $deepest = $depth; + if ($ele->hasChildNodes()) { + foreach ($ele->childNodes as $child) { + if ($child->nodeType === XML_ELEMENT_NODE) { + $current = $this->deepestNode($child, $depth + 1, $current, $deepest); + } + } + } + elseif ($depth > $deepest) { + $current = array($ele); + $deepest = $depth; + } + elseif ($depth === $deepest) { + $current[] = $ele; + } + return $current; + } + + /** + * Prepare an item for insertion into a DOM. + * + * This handles a variety of boilerplate tasks that need doing before an + * indeterminate object can be inserted into a DOM tree. + * - If item is a string, this is converted into a document fragment and returned. + * - If item is a DOMQuery, then all items are retrieved and converted into + * a document fragment and returned. + * - If the item is a DOMNode, it is imported into the current DOM if necessary. + * - If the item is a SimpleXMLElement, it is converted into a DOM node and then + * imported. + * + * @param mixed $item + * Item to prepare for insert. + * @return mixed + * Returns the prepared item. + * @throws QueryPath::Exception + * Thrown if the object passed in is not of a supprted object type. + */ + protected function prepareInsert($item) { + if(empty($item)) { + return; + } + elseif (is_string($item)) { + // If configured to do so, replace all entities. + if ($this->options['replace_entities']) { + $item = \QueryPath\Entities::replaceAllEntities($item); + } + + $frag = $this->document->createDocumentFragment(); + try { + set_error_handler(array('\QueryPath\ParseException', 'initializeFromError'), $this->errTypes); + $frag->appendXML($item); + } + // Simulate a finally block. + catch (Exception $e) { + restore_error_handler(); + throw $e; + } + restore_error_handler(); + return $frag; + } + elseif ($item instanceof DOMQuery) { + if ($item->size() == 0) + return; + + $frag = $this->document->createDocumentFragment(); + foreach ($item->matches as $m) { + $frag->appendXML($item->document->saveXML($m)); + } + return $frag; + } + elseif ($item instanceof \DOMNode) { + if ($item->ownerDocument !== $this->document) { + // Deep clone this and attach it to this document + $item = $this->document->importNode($item, TRUE); + } + return $item; + } + elseif ($item instanceof \SimpleXMLElement) { + $element = dom_import_simplexml($item); + return $this->document->importNode($element, TRUE); + } + // What should we do here? + //var_dump($item); + throw new \QueryPath\Exception("Cannot prepare item of unsupported type: " . gettype($item)); + } + /** + * The tag name of the first element in the list. + * + * This returns the tag name of the first element in the list of matches. If + * the list is empty, an empty string will be used. + * + * @see replaceAll() + * @see replaceWith() + * @return string + * The tag name of the first element in the list. + */ + public function tag() { + return ($this->size() > 0) ? $this->getFirstMatch()->tagName : ''; + } + /** + * Remove any items from the list if they match the selector. + * + * In other words, each item that matches the selector will be remove + * from the DOM document. The returned DOMQuery wraps the list of + * removed elements. + * + * If no selector is specified, this will remove all current matches from + * the document. + * + * @param string $selector + * A CSS Selector. + * @return \QueryPath\DOMQuery + * The Query path wrapping a list of removed items. + * @see replaceAll() + * @see replaceWith() + * @see removeChildren() + */ + public function remove($selector = NULL) { + if(!empty($selector)) { + // Do a non-destructive find. + $query = new QueryPathEventHandler($this->matches); + $query->find($selector); + $matches = $query->getMatches(); + } + else { + $matches = $this->matches; + } + + $found = new \SplObjectStorage(); + foreach ($matches as $item) { + // The item returned is (according to docs) different from + // the one passed in, so we have to re-store it. + $found->attach($item->parentNode->removeChild($item)); + } + + // Return a clone DOMQuery with just the removed items. If + // no items are found, this will return an empty DOMQuery. + return count($found) == 0 ? new static() : new static($found); + } + /** + * This replaces everything that matches the selector with the first value + * in the current list. + * + * This is the reverse of replaceWith. + * + * Unlike jQuery, DOMQuery cannot assume a default document. Consequently, + * you must specify the intended destination document. If it is omitted, the + * present document is assumed to be tthe document. However, that can result + * in undefined behavior if the selector and the replacement are not sufficiently + * distinct. + * + * @param string $selector + * The selector. + * @param DOMDocument $document + * The destination document. + * @return \QueryPath\DOMQuery + * The DOMQuery wrapping the modified document. + * @deprecated Due to the fact that this is not a particularly friendly method, + * and that it can be easily replicated using {@see replaceWith()}, it is to be + * considered deprecated. + * @see remove() + * @see replaceWith() + */ + public function replaceAll($selector, \DOMDocument $document) { + $replacement = $this->size() > 0 ? $this->getFirstMatch() : $this->document->createTextNode(''); + + $c = new QueryPathEventHandler($document); + $c->find($selector); + $temp = $c->getMatches(); + foreach ($temp as $item) { + $node = $replacement->cloneNode(); + $node = $document->importNode($node); + $item->parentNode->replaceChild($node, $item); + } + return QueryPath::with($document, NULL, $this->options); + } + /** + * Add more elements to the current set of matches. + * + * This begins the new query at the top of the DOM again. The results found + * when running this selector are then merged into the existing results. In + * this way, you can add additional elements to the existing set. + * + * @param string $selector + * A valid selector. + * @return \QueryPath\DOMQuery + * The DOMQuery object with the newly added elements. + * @see append() + * @see after() + * @see andSelf() + * @see end() + */ + public function add($selector) { + + // This is destructive, so we need to set $last: + $this->last = $this->matches; + + foreach (QueryPath::with($this->document, $selector, $this->options)->get() as $item) { + $this->matches->attach($item); + } + return $this; + } + /** + * Revert to the previous set of matches. + * + * <b>DEPRECATED</b> Do not use. + * + * This will revert back to the last set of matches (before the last + * "destructive" set of operations). This undoes any change made to the set of + * matched objects. Functions like find() and filter() change the + * list of matched objects. The end() function will revert back to the last set of + * matched items. + * + * Note that functions that modify the document, but do not change the list of + * matched objects, are not "destructive". Thus, calling append('something')->end() + * will not undo the append() call. + * + * Only one level of changes is stored. Reverting beyond that will result in + * an empty set of matches. Example: + * + * @code + * // The line below returns the same thing as qp(document, 'p'); + * qp(document, 'p')->find('div')->end(); + * // This returns an empty array: + * qp(document, 'p')->end(); + * // This returns an empty array: + * qp(document, 'p')->find('div')->find('span')->end()->end(); + * @endcode + * + * The last one returns an empty array because only one level of changes is stored. + * + * @return \QueryPath\DOMQuery + * A DOMNode object reflecting the list of matches prior to the last destructive + * operation. + * @see andSelf() + * @see add() + * @deprecated This function will be removed. + */ + public function end() { + // Note that this does not use setMatches because it must set the previous + // set of matches to empty array. + $this->matches = $this->last; + $this->last = new \SplObjectStorage(); + return $this; + } + /** + * Combine the current and previous set of matched objects. + * + * Example: + * + * @code + * qp(document, 'p')->find('div')->andSelf(); + * @endcode + * + * The code above will contain a list of all p elements and all div elements that + * are beneath p elements. + * + * @see end(); + * @return \QueryPath\DOMQuery + * A DOMNode object with the results of the last two "destructive" operations. + * @see add() + * @see end() + */ + public function andSelf() { + // This is destructive, so we need to set $last: + $last = $this->matches; + + foreach ($this->last as $item) $this->matches->attach($item); + + $this->last = $last; + return $this; + } + /** + * Remove all child nodes. + * + * This is equivalent to jQuery's empty() function. (However, empty() is a + * PHP built-in, and cannot be used as a method name.) + * + * @return \QueryPath\DOMQuery + * The DOMQuery object with the child nodes removed. + * @see replaceWith() + * @see replaceAll() + * @see remove() + */ + public function removeChildren() { + foreach ($this->matches as $m) { + while($kid = $m->firstChild) { + $m->removeChild($kid); + } + } + return $this; + } + /** + * Get the children of the elements in the DOMQuery object. + * + * If a selector is provided, the list of children will be filtered through + * the selector. + * + * @param string $selector + * A valid selector. + * @return \QueryPath\DOMQuery + * A DOMQuery wrapping all of the children. + * @see removeChildren() + * @see parent() + * @see parents() + * @see next() + * @see prev() + */ + public function children($selector = NULL) { + $found = new \SplObjectStorage(); + $filter = strlen($selector) > 0; + + if ($filter) { + $tmp = new \SplObjectStorage(); + } + foreach ($this->matches as $m) { + foreach($m->childNodes as $c) { + if ($c->nodeType == XML_ELEMENT_NODE) { + // This is basically an optimized filter() just for children(). + if ($filter) { + $tmp->attach($c); + $query = new \QueryPath\CSS\DOMTraverser($tmp, TRUE, $c); + $query->find($selector); + if (count($query->matches()) > 0) { + $found->attach($c); + } + $tmp->detach($c); + + } + // No filter. Just attach it. + else { + $found->attach($c); + } + } + } + } + $new = $this->inst($found, NULL, $this->options); + return $new; + } + /** + * Get all child nodes (not just elements) of all items in the matched set. + * + * It gets only the immediate children, not all nodes in the subtree. + * + * This does not process iframes. Xinclude processing is dependent on the + * DOM implementation and configuration. + * + * @return \QueryPath\DOMQuery + * A DOMNode object wrapping all child nodes for all elements in the + * DOMNode object. + * @see find() + * @see text() + * @see html() + * @see innerHTML() + * @see xml() + * @see innerXML() + */ + public function contents() { + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) { + if (empty($m->childNodes)) continue; // Issue #51 + foreach ($m->childNodes as $c) { + $found->attach($c); + } + } + return $this->inst($found, NULL, $this->options); + } + /** + * Get a list of siblings for elements currently wrapped by this object. + * + * This will compile a list of every sibling of every element in the + * current list of elements. + * + * Note that if two siblings are present in the DOMQuery object to begin with, + * then both will be returned in the matched set, since they are siblings of each + * other. In other words,if the matches contain a and b, and a and b are siblings of + * each other, than running siblings will return a set that contains + * both a and b. + * + * @param string $selector + * If the optional selector is provided, siblings will be filtered through + * this expression. + * @return \QueryPath\DOMQuery + * The DOMQuery containing the matched siblings. + * @see contents() + * @see children() + * @see parent() + * @see parents() + */ + public function siblings($selector = NULL) { + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) { + $parent = $m->parentNode; + foreach ($parent->childNodes as $n) { + if ($n->nodeType == XML_ELEMENT_NODE && $n !== $m) { + $found->attach($n); + } + } + } + if (empty($selector)) { + return $this->inst($found, NULL, $this->options); + } + else { + return $this->inst($found, NULL, $this->options)->filter($selector); + } + } + /** + * Find the closest element matching the selector. + * + * This finds the closest match in the ancestry chain. It first checks the + * present element. If the present element does not match, this traverses up + * the ancestry chain (e.g. checks each parent) looking for an item that matches. + * + * It is provided for jQuery 1.3 compatibility. + * @param string $selector + * A CSS Selector to match. + * @return \QueryPath\DOMQuery + * The set of matches. + * @since 2.0 + */ + public function closest($selector) { + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) { + + if (QueryPath::with($m, NULL, $this->options)->is($selector) > 0) { + $found->attach($m); + } + else { + while ($m->parentNode->nodeType !== XML_DOCUMENT_NODE) { + $m = $m->parentNode; + // Is there any case where parent node is not an element? + if ($m->nodeType === XML_ELEMENT_NODE && QueryPath::with($m, NULL, $this->options)->is($selector) > 0) { + $found->attach($m); + break; + } + } + } + + } + // XXX: Should this be an in-place modification? + return $this->inst($found, NULL, $this->options); + //$this->setMatches($found); + //return $this; + } + /** + * Get the immediate parent of each element in the DOMQuery. + * + * If a selector is passed, this will return the nearest matching parent for + * each element in the DOMQuery. + * + * @param string $selector + * A valid CSS3 selector. + * @return \QueryPath\DOMQuery + * A DOMNode object wrapping the matching parents. + * @see children() + * @see siblings() + * @see parents() + */ + public function parent($selector = NULL) { + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) { + while ($m->parentNode->nodeType !== XML_DOCUMENT_NODE) { + $m = $m->parentNode; + // Is there any case where parent node is not an element? + if ($m->nodeType === XML_ELEMENT_NODE) { + if (!empty($selector)) { + if (QueryPath::with($m, NULL, $this->options)->is($selector) > 0) { + $found->attach($m); + break; + } + } + else { + $found->attach($m); + break; + } + } + } + } + return $this->inst($found, NULL, $this->options); + } + /** + * Get all ancestors of each element in the DOMQuery. + * + * If a selector is present, only matching ancestors will be retrieved. + * + * @see parent() + * @param string $selector + * A valid CSS 3 Selector. + * @return \QueryPath\DOMQuery + * A DOMNode object containing the matching ancestors. + * @see siblings() + * @see children() + */ + public function parents($selector = NULL) { + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) { + while ($m->parentNode->nodeType !== XML_DOCUMENT_NODE) { + $m = $m->parentNode; + // Is there any case where parent node is not an element? + if ($m->nodeType === XML_ELEMENT_NODE) { + if (!empty($selector)) { + if (QueryPath::with($m, NULL, $this->options)->is($selector) > 0) + $found->attach($m); + } + else + $found->attach($m); + } + } + } + return $this->inst($found, NULL, $this->options); + } + /** + * Set or get the markup for an element. + * + * If $markup is set, then the giving markup will be injected into each + * item in the set. All other children of that node will be deleted, and this + * new code will be the only child or children. The markup MUST BE WELL FORMED. + * + * If no markup is given, this will return a string representing the child + * markup of the first node. + * + * <b>Important:</b> This differs from jQuery's html() function. This function + * returns <i>the current node</i> and all of its children. jQuery returns only + * the children. This means you do not need to do things like this: + * @code$qp->parent()->html()@endcode. + * + * By default, this is HTML 4.01, not XHTML. Use {@link xml()} for XHTML. + * + * @param string $markup + * The text to insert. + * @return mixed + * A string if no markup was passed, or a DOMQuery if markup was passed. + * @see xml() + * @see text() + * @see contents() + */ + public function html($markup = NULL) { + if (isset($markup)) { + + if ($this->options['replace_entities']) { + $markup = \QueryPath\Entities::replaceAllEntities($markup); + } + + // Parse the HTML and insert it into the DOM + //$doc = DOMDocument::loadHTML($markup); + $doc = $this->document->createDocumentFragment(); + $doc->appendXML($markup); + $this->removeChildren(); + $this->append($doc); + return $this; + } + $length = $this->size(); + if ($length == 0) { + return NULL; + } + // Only return the first item -- that's what JQ does. + $first = $this->getFirstMatch(); + + // Catch cases where first item is not a legit DOM object. + if (!($first instanceof \DOMNode)) { + return NULL; + } + + // Added by eabrand. + if(!$first->ownerDocument->documentElement) { + return NULL; + } + + if ($first instanceof \DOMDocument || $first->isSameNode($first->ownerDocument->documentElement)) { + return $this->document->saveHTML(); + } + // saveHTML cannot take a node and serialize it. + return $this->document->saveXML($first); + } + + /** + * Write the QueryPath document to HTML5. + * + * See html() + */ + function html5($markup = NULL) { + $html5 = new HTML5($this->options); + + // append HTML to existing + if (isset($markup)) { + + // Parse the HTML and insert it into the DOM + $doc = $html5->loadHTMLFragment($markup); + $this->removeChildren(); + $this->append($doc); + return $this; + } + + $length = $this->size(); + if ($length == 0) { + return NULL; + } + // Only return the first item -- that's what JQ does. + $first = $this->getFirstMatch(); + + // Catch cases where first item is not a legit DOM object. + if (!($first instanceof \DOMNode)) { + return NULL; + } + + // Added by eabrand. + if(!$first->ownerDocument->documentElement) { + return NULL; + } + + if ($first instanceof \DOMDocument || $first->isSameNode($first->ownerDocument->documentElement)) { + return $html5->saveHTML($this->document); //$this->document->saveHTML(); + } + return $html5->saveHTML($first); + } + + /** + * Fetch the HTML contents INSIDE of the first DOMQuery item. + * + * <b>This behaves the way jQuery's @codehtml()@endcode function behaves.</b> + * + * This gets all children of the first match in DOMQuery. + * + * Consider this fragment: + * @code + * <div> + * test <p>foo</p> test + * </div> + * @endcode + * + * We can retrieve just the contents of this code by doing something like + * this: + * @code + * qp($xml, 'div')->innerHTML(); + * @endcode + * + * This would return the following: + * @codetest <p>foo</p> test@endcode + * + * @return string + * Returns a string representation of the child nodes of the first + * matched element. + * @see html() + * @see innerXML() + * @see innerXHTML() + * @since 2.0 + */ + public function innerHTML() { + return $this->innerXML(); + } + + /** + * Fetch child (inner) nodes of the first match. + * + * This will return the children of the present match. For an example, + * see {@link innerHTML()}. + * + * @see innerHTML() + * @see innerXML() + * @return string + * Returns a string of XHTML that represents the children of the present + * node. + * @since 2.0 + */ + public function innerXHTML() { + $length = $this->size(); + if ($length == 0) { + return NULL; + } + // Only return the first item -- that's what JQ does. + $first = $this->getFirstMatch(); + + // Catch cases where first item is not a legit DOM object. + if (!($first instanceof \DOMNode)) { + return NULL; + } + elseif (!$first->hasChildNodes()) { + return ''; + } + + $buffer = ''; + foreach ($first->childNodes as $child) { + $buffer .= $this->document->saveXML($child, LIBXML_NOEMPTYTAG); + } + + return $buffer; + } + + /** + * Fetch child (inner) nodes of the first match. + * + * This will return the children of the present match. For an example, + * see {@link innerHTML()}. + * + * @see innerHTML() + * @see innerXHTML() + * @return string + * Returns a string of XHTML that represents the children of the present + * node. + * @since 2.0 + */ + public function innerXML() { + $length = $this->size(); + if ($length == 0) { + return NULL; + } + // Only return the first item -- that's what JQ does. + $first = $this->getFirstMatch(); + + // Catch cases where first item is not a legit DOM object. + if (!($first instanceof \DOMNode)) { + return NULL; + } + elseif (!$first->hasChildNodes()) { + return ''; + } + + $buffer = ''; + foreach ($first->childNodes as $child) { + $buffer .= $this->document->saveXML($child); + } + + return $buffer; + } + + /** + * Get child elements as an HTML5 string. + * + * TODO: This is a very simple alteration of innerXML. Do we need better + * support? + */ + public function innerHTML5() { + $length = $this->size(); + if ($length == 0) { + return NULL; + } + // Only return the first item -- that's what JQ does. + $first = $this->getFirstMatch(); + + // Catch cases where first item is not a legit DOM object. + if (!($first instanceof \DOMNode)) { + return NULL; + } + elseif (!$first->hasChildNodes()) { + return ''; + } + + $html5 = new HTML5($this->options); + $buffer = ''; + foreach ($first->childNodes as $child) { + $buffer .= $html5->saveHTML($child); + } + + return $buffer; + } + + /** + * Retrieve the text of each match and concatenate them with the given separator. + * + * This has the effect of looping through all children, retrieving their text + * content, and then concatenating the text with a separator. + * + * @param string $sep + * The string used to separate text items. The default is a comma followed by a + * space. + * @param boolean $filterEmpties + * If this is true, empty items will be ignored. + * @return string + * The text contents, concatenated together with the given separator between + * every pair of items. + * @see implode() + * @see text() + * @since 2.0 + */ + public function textImplode($sep = ', ', $filterEmpties = TRUE) { + $tmp = array(); + foreach ($this->matches as $m) { + $txt = $m->textContent; + $trimmed = trim($txt); + // If filter empties out, then we only add items that have content. + if ($filterEmpties) { + if (strlen($trimmed) > 0) $tmp[] = $txt; + } + // Else add all content, even if it's empty. + else { + $tmp[] = $txt; + } + } + return implode($sep, $tmp); + } + /** + * Get the text contents from just child elements. + * + * This is a specialized variant of textImplode() that implodes text for just the + * child elements of the current element. + * + * @param string $separator + * The separator that will be inserted between found text content. + * @return string + * The concatenated values of all children. + */ + function childrenText($separator = ' ') { + // Branch makes it non-destructive. + return $this->branch()->xpath('descendant::text()')->textImplode($separator); + } + /** + * Get or set the text contents of a node. + * @param string $text + * If this is not NULL, this value will be set as the text of the node. It + * will replace any existing content. + * @return mixed + * A DOMQuery if $text is set, or the text content if no text + * is passed in as a pram. + * @see html() + * @see xml() + * @see contents() + */ + public function text($text = NULL) { + if (isset($text)) { + $this->removeChildren(); + foreach ($this->matches as $m) $m->appendChild($this->document->createTextNode($text)); + return $this; + } + // Returns all text as one string: + $buf = ''; + foreach ($this->matches as $m) $buf .= $m->textContent; + return $buf; + } + /** + * Get or set the text before each selected item. + * + * If $text is passed in, the text is inserted before each currently selected item. + * + * If no text is given, this will return the concatenated text after each selected element. + * + * @code + * <?php + * $xml = '<?xml version="1.0"?><root>Foo<a>Bar</a><b/></root>'; + * + * // This will return 'Foo' + * qp($xml, 'a')->textBefore(); + * + * // This will insert 'Baz' right before <b/>. + * qp($xml, 'b')->textBefore('Baz'); + * ?> + * @endcode + * + * @param string $text + * If this is set, it will be inserted before each node in the current set of + * selected items. + * @return mixed + * Returns the DOMQuery object if $text was set, and returns a string (possibly empty) + * if no param is passed. + */ + public function textBefore($text = NULL) { + if (isset($text)) { + $textNode = $this->document->createTextNode($text); + return $this->before($textNode); + } + $buffer = ''; + foreach ($this->matches as $m) { + $p = $m; + while (isset($p->previousSibling) && $p->previousSibling->nodeType == XML_TEXT_NODE) { + $p = $p->previousSibling; + $buffer .= $p->textContent; + } + } + return $buffer; + } + + public function textAfter($text = NULL) { + if (isset($text)) { + $textNode = $this->document->createTextNode($text); + return $this->after($textNode); + } + $buffer = ''; + foreach ($this->matches as $m) { + $n = $m; + while (isset($n->nextSibling) && $n->nextSibling->nodeType == XML_TEXT_NODE) { + $n = $n->nextSibling; + $buffer .= $n->textContent; + } + } + return $buffer; + } + + /** + * Set or get the value of an element's 'value' attribute. + * + * The 'value' attribute is common in HTML form elements. This is a + * convenience function for accessing the values. Since this is not common + * task on the server side, this method may be removed in future releases. (It + * is currently provided for jQuery compatibility.) + * + * If a value is provided in the params, then the value will be set for all + * matches. If no params are given, then the value of the first matched element + * will be returned. This may be NULL. + * + * @deprecated Just use attr(). There's no reason to use this on the server. + * @see attr() + * @param string $value + * @return mixed + * Returns a DOMQuery if a string was passed in, and a string if no string + * was passed in. In the later case, an error will produce NULL. + */ + public function val($value = NULL) { + if (isset($value)) { + $this->attr('value', $value); + return $this; + } + return $this->attr('value'); + } + /** + * Set or get XHTML markup for an element or elements. + * + * This differs from {@link html()} in that it processes (and produces) + * strictly XML 1.0 compliant markup. + * + * Like {@link xml()} and {@link html()}, this functions as both a + * setter and a getter. + * + * This is a convenience function for fetching HTML in XML format. + * It does no processing of the markup (such as schema validation). + * @param string $markup + * A string containing XML data. + * @return mixed + * If markup is passed in, a DOMQuery is returned. If no markup is passed + * in, XML representing the first matched element is returned. + * @see html() + * @see innerXHTML() + */ + public function xhtml($markup = NULL) { + + // XXX: This is a minor reworking of the original xml() method. + // This should be refactored, probably. + // See http://github.com/technosophos/querypath/issues#issue/10 + + $omit_xml_decl = $this->options['omit_xml_declaration']; + if ($markup === TRUE) { + // Basically, we handle the special case where we don't + // want the XML declaration to be displayed. + $omit_xml_decl = TRUE; + } + elseif (isset($markup)) { + return $this->xml($markup); + } + + $length = $this->size(); + if ($length == 0) { + return NULL; + } + + // Only return the first item -- that's what JQ does. + $first = $this->getFirstMatch(); + // Catch cases where first item is not a legit DOM object. + if (!($first instanceof \DOMNode)) { + return NULL; + } + + if ($first instanceof \DOMDocument || $first->isSameNode($first->ownerDocument->documentElement)) { + + // Has the unfortunate side-effect of stripping doctype. + //$text = ($omit_xml_decl ? $this->document->saveXML($first->ownerDocument->documentElement, LIBXML_NOEMPTYTAG) : $this->document->saveXML(NULL, LIBXML_NOEMPTYTAG)); + $text = $this->document->saveXML(NULL, LIBXML_NOEMPTYTAG); + } + else { + $text = $this->document->saveXML($first, LIBXML_NOEMPTYTAG); + } + + // Issue #47: Using the old trick for removing the XML tag also removed the + // doctype. So we remove it with a regex: + if ($omit_xml_decl) { + $text = preg_replace('/<\?xml\s[^>]*\?>/', '', $text); + } + + // This is slightly lenient: It allows for cases where code incorrectly places content + // inside of these supposedly unary elements. + $unary = '/<(area|base|basefont|br|col|frame|hr|img|input|isindex|link|meta|param)(?(?=\s)([^>\/]+))><\/[^>]*>/i'; + $text = preg_replace($unary, '<\\1\\2 />', $text); + + // Experimental: Support for enclosing CDATA sections with comments to be both XML compat + // and HTML 4/5 compat + $cdata = '/(<!\[CDATA\[|\]\]>)/i'; + $replace = $this->options['escape_xhtml_js_css_sections']; + $text = preg_replace($cdata, $replace, $text); + + return $text; + } + /** + * Set or get the XML markup for an element or elements. + * + * Like {@link html()}, this functions in both a setter and a getter mode. + * + * In setter mode, the string passed in will be parsed and then appended to the + * elements wrapped by this DOMNode object.When in setter mode, this parses + * the XML using the DOMFragment parser. For that reason, an XML declaration + * is not necessary. + * + * In getter mode, the first element wrapped by this DOMNode object will be + * converted to an XML string and returned. + * + * @param string $markup + * A string containing XML data. + * @return mixed + * If markup is passed in, a DOMQuery is returned. If no markup is passed + * in, XML representing the first matched element is returned. + * @see xhtml() + * @see html() + * @see text() + * @see content() + * @see innerXML() + */ + public function xml($markup = NULL) { + $omit_xml_decl = $this->options['omit_xml_declaration']; + if ($markup === TRUE) { + // Basically, we handle the special case where we don't + // want the XML declaration to be displayed. + $omit_xml_decl = TRUE; + } + elseif (isset($markup)) { + if ($this->options['replace_entities']) { + $markup = \QueryPath\Entities::replaceAllEntities($markup); + } + $doc = $this->document->createDocumentFragment(); + $doc->appendXML($markup); + $this->removeChildren(); + $this->append($doc); + return $this; + } + $length = $this->size(); + if ($length == 0) { + return NULL; + } + // Only return the first item -- that's what JQ does. + $first = $this->getFirstMatch(); + + // Catch cases where first item is not a legit DOM object. + if (!($first instanceof \DOMNode)) { + return NULL; + } + + if ($first instanceof \DOMDocument || $first->isSameNode($first->ownerDocument->documentElement)) { + + return ($omit_xml_decl ? $this->document->saveXML($first->ownerDocument->documentElement) : $this->document->saveXML()); + } + return $this->document->saveXML($first); + } + /** + * Send the XML document to the client. + * + * Write the document to a file path, if given, or + * to stdout (usually the client). + * + * This prints the entire document. + * + * @param string $path + * The path to the file into which the XML should be written. if + * this is NULL, data will be written to STDOUT, which is usually + * sent to the remote browser. + * @param int $options + * (As of QueryPath 2.1) Pass libxml options to the saving mechanism. + * @return \QueryPath\DOMQuery + * The DOMQuery object, unmodified. + * @see xml() + * @see innerXML() + * @see writeXHTML() + * @throws Exception + * In the event that a file cannot be written, an Exception will be thrown. + */ + public function writeXML($path = NULL, $options = NULL) { + if ($path == NULL) { + print $this->document->saveXML(NULL, $options); + } + else { + try { + set_error_handler(array('\QueryPath\IOException', 'initializeFromError')); + $this->document->save($path, $options); + } + catch (Exception $e) { + restore_error_handler(); + throw $e; + } + restore_error_handler(); + } + return $this; + } + /** + * Writes HTML to output. + * + * HTML is formatted as HTML 4.01, without strict XML unary tags. This is for + * legacy HTML content. Modern XHTML should be written using {@link toXHTML()}. + * + * Write the document to stdout (usually the client) or to a file. + * + * @param string $path + * The path to the file into which the XML should be written. if + * this is NULL, data will be written to STDOUT, which is usually + * sent to the remote browser. + * @return \QueryPath\DOMQuery + * The DOMQuery object, unmodified. + * @see html() + * @see innerHTML() + * @throws Exception + * In the event that a file cannot be written, an Exception will be thrown. + */ + public function writeHTML($path = NULL) { + if ($path == NULL) { + print $this->document->saveHTML(); + } + else { + try { + set_error_handler(array('\QueryPath\ParseException', 'initializeFromError')); + $this->document->saveHTMLFile($path); + } + catch (Exception $e) { + restore_error_handler(); + throw $e; + } + restore_error_handler(); + } + return $this; + } + + /** + * Write the document to HTML5. + * + * This works the same as the other write* functions, but it encodes the output + * as HTML5 with UTF-8. + * @see html5() + * @see innerHTML5() + * @throws Exception + * In the event that a file cannot be written, an Exception will be thrown. + */ + public function writeHTML5($path = NULL) { + $html5 = new HTML5(); + if ($path == NULL) { + // Print the document to stdout. + print $html5->saveHTML($this->document); + return; + } + + $html5->save($this->document, $path); + } + + /** + * Write an XHTML file to output. + * + * Typically, you should use this instead of {@link writeHTML()}. + * + * Currently, this functions identically to {@link toXML()} <i>except that</i> + * it always uses closing tags (e.g. always @code<script></script>@endcode, + * never @code<script/>@endcode). It will + * write the file as well-formed XML. No XHTML schema validation is done. + * + * @see writeXML() + * @see xml() + * @see writeHTML() + * @see innerXHTML() + * @see xhtml() + * @param string $path + * The filename of the file to write to. + * @return \QueryPath\DOMQuery + * Returns the DOMQuery, unmodified. + * @throws Exception + * In the event that the output file cannot be written, an exception is + * thrown. + * @since 2.0 + */ + public function writeXHTML($path = NULL) { + return $this->writeXML($path, LIBXML_NOEMPTYTAG); + } + /** + * Get the next sibling of each element in the DOMQuery. + * + * If a selector is provided, the next matching sibling will be returned. + * + * @param string $selector + * A CSS3 selector. + * @return \QueryPath\DOMQuery + * The DOMQuery object. + * @see nextAll() + * @see prev() + * @see children() + * @see contents() + * @see parent() + * @see parents() + */ + public function next($selector = NULL) { + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) { + while (isset($m->nextSibling)) { + $m = $m->nextSibling; + if ($m->nodeType === XML_ELEMENT_NODE) { + if (!empty($selector)) { + if (QueryPath::with($m, NULL, $this->options)->is($selector) > 0) { + $found->attach($m); + break; + } + } + else { + $found->attach($m); + break; + } + } + } + } + return $this->inst($found, NULL, $this->options); + } + /** + * Get all siblings after an element. + * + * For each element in the DOMQuery, get all siblings that appear after + * it. If a selector is passed in, then only siblings that match the + * selector will be included. + * + * @param string $selector + * A valid CSS 3 selector. + * @return \QueryPath\DOMQuery + * The DOMQuery object, now containing the matching siblings. + * @see next() + * @see prevAll() + * @see children() + * @see siblings() + */ + public function nextAll($selector = NULL) { + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) { + while (isset($m->nextSibling)) { + $m = $m->nextSibling; + if ($m->nodeType === XML_ELEMENT_NODE) { + if (!empty($selector)) { + if (QueryPath::with($m, NULL, $this->options)->is($selector) > 0) { + $found->attach($m); + } + } + else { + $found->attach($m); + } + } + } + } + return $this->inst($found, NULL, $this->options); + } + /** + * Get the next sibling before each element in the DOMQuery. + * + * For each element in the DOMQuery, this retrieves the previous sibling + * (if any). If a selector is supplied, it retrieves the first matching + * sibling (if any is found). + * + * @param string $selector + * A valid CSS 3 selector. + * @return \QueryPath\DOMQuery + * A DOMNode object, now containing any previous siblings that have been + * found. + * @see prevAll() + * @see next() + * @see siblings() + * @see children() + */ + public function prev($selector = NULL) { + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) { + while (isset($m->previousSibling)) { + $m = $m->previousSibling; + if ($m->nodeType === XML_ELEMENT_NODE) { + if (!empty($selector)) { + if (QueryPath::with($m, NULL, $this->options)->is($selector)) { + $found->attach($m); + break; + } + } + else { + $found->attach($m); + break; + } + } + } + } + return $this->inst($found, NULL, $this->options); + } + /** + * Get the previous siblings for each element in the DOMQuery. + * + * For each element in the DOMQuery, get all previous siblings. If a + * selector is provided, only matching siblings will be retrieved. + * + * @param string $selector + * A valid CSS 3 selector. + * @return \QueryPath\DOMQuery + * The DOMQuery object, now wrapping previous sibling elements. + * @see prev() + * @see nextAll() + * @see siblings() + * @see contents() + * @see children() + */ + public function prevAll($selector = NULL) { + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) { + while (isset($m->previousSibling)) { + $m = $m->previousSibling; + if ($m->nodeType === XML_ELEMENT_NODE) { + if (!empty($selector)) { + if (QueryPath::with($m, NULL, $this->options)->is($selector)) { + $found->attach($m); + } + } + else { + $found->attach($m); + } + } + } + } + return $this->inst($found, NULL, $this->options); + } + /** + * Add a class to all elements in the current DOMQuery. + * + * This searchers for a class attribute on each item wrapped by the current + * DOMNode object. If no attribute is found, a new one is added and its value + * is set to $class. If a class attribute is found, then the value is appended + * on to the end. + * + * @param string $class + * The name of the class. + * @return \QueryPath\DOMQuery + * Returns the DOMQuery object. + * @see css() + * @see attr() + * @see removeClass() + * @see hasClass() + */ + public function addClass($class) { + foreach ($this->matches as $m) { + if ($m->hasAttribute('class')) { + $val = $m->getAttribute('class'); + $m->setAttribute('class', $val . ' ' . $class); + } + else { + $m->setAttribute('class', $class); + } + } + return $this; + } + /** + * Remove the named class from any element in the DOMQuery that has it. + * + * This may result in the entire class attribute being removed. If there + * are other items in the class attribute, though, they will not be removed. + * + * Example: + * Consider this XML: + * @code + * <element class="first second"/> + * @endcode + * + * Executing this fragment of code will remove only the 'first' class: + * @code + * qp(document, 'element')->removeClass('first'); + * @endcode + * + * The resulting XML will be: + * @code + * <element class="second"/> + * @endcode + * + * To remove the entire 'class' attribute, you should use {@see removeAttr()}. + * + * @param string $class + * The class name to remove. + * @return \QueryPath\DOMQuery + * The modified DOMNode object. + * @see attr() + * @see addClass() + * @see hasClass() + */ + public function removeClass($class = false) { + if (empty($class)) + { + foreach ($this->matches as $m) { + $m->removeAttribute('class'); + } + }else{ + $to_remove = array_filter(explode(' ',$class)); + foreach ($this->matches as $m) { + if ($m->hasAttribute('class')) { + $vals = array_filter(explode(' ', $m->getAttribute('class'))); + $buf = array(); + foreach ($vals as $v) { + if (!in_array($v, $to_remove)) + $buf[] = $v; + } + if (empty($buf)) + $m->removeAttribute('class'); + else + $m->setAttribute('class', implode(' ', $buf)); + } + } + } + return $this; + } + + /** + * Returns TRUE if any of the elements in the DOMQuery have the specified class. + * + * @param string $class + * The name of the class. + * @return boolean + * TRUE if the class exists in one or more of the elements, FALSE otherwise. + * @see addClass() + * @see removeClass() + */ + public function hasClass($class) { + foreach ($this->matches as $m) { + if ($m->hasAttribute('class')) { + $vals = explode(' ', $m->getAttribute('class')); + if (in_array($class, $vals)) return TRUE; + } + } + return FALSE; + } + + /** + * Branch the base DOMQuery into another one with the same matches. + * + * This function makes a copy of the DOMQuery object, but keeps the new copy + * (initially) pointed at the same matches. This object can then be queried without + * changing the original DOMQuery. However, changes to the elements inside of this + * DOMQuery will show up in the DOMQuery from which it is branched. + * + * Compare this operation with {@link cloneAll()}. The cloneAll() call takes + * the current DOMNode object and makes a copy of all of its matches. You continue + * to operate on the same DOMNode object, but the elements inside of the DOMQuery + * are copies of those before the call to cloneAll(). + * + * This, on the other hand, copies <i>the DOMQuery</i>, but keeps valid + * references to the document and the wrapped elements. A new query branch is + * created, but any changes will be written back to the same document. + * + * In practice, this comes in handy when you want to do multiple queries on a part + * of the document, but then return to a previous set of matches. (see {@link QPTPL} + * for examples of this in practice). + * + * Example: + * + * @code + * <?php + * $qp = qp( QueryPath::HTML_STUB); + * $branch = $qp->branch(); + * $branch->find('title')->text('Title'); + * $qp->find('body')->text('This is the body')->writeHTML; + * ?> + * @endcode + * + * Notice that in the code, each of the DOMQuery objects is doing its own + * query. However, both are modifying the same document. The result of the above + * would look something like this: + * + * @code + * <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> + * <html xmlns="http://www.w3.org/1999/xhtml"> + * <head> + * <meta http-equiv="Content-Type" content="text/html; charset=utf-8"></meta> + * <title>Title</title> + * </head> + * <body>This is the body</body> + * </html> + * @endcode + * + * Notice that while $qp and $banch were performing separate queries, they + * both modified the same document. + * + * In jQuery or a browser-based solution, you generally do not need a branching + * function because there is (implicitly) only one document. In QueryPath, there + * is no implicit document. Every document must be explicitly specified (and, + * in most cases, parsed -- which is costly). Branching makes it possible to + * work on one document with multiple DOMNode objects. + * + * @param string $selector + * If a selector is passed in, an additional {@link find()} will be executed + * on the branch before it is returned. (Added in QueryPath 2.0.) + * @return \QueryPath\DOMQuery + * A copy of the DOMQuery object that points to the same set of elements that + * the original DOMQuery was pointing to. + * @since 1.1 + * @see cloneAll() + * @see find() + */ + public function branch($selector = NULL) { + $temp = \QueryPath::with($this->matches, NULL, $this->options); + //if (isset($selector)) $temp->find($selector); + $temp->document = $this->document; + if (isset($selector)) $temp->findInPlace($selector); + return $temp; + } + protected function inst($matches, $selector, $options) { + /* + $temp = \QueryPath::with($matches, NULL, $options); + //if (isset($selector)) $temp->find($selector); + $temp->document = $this->document; + if (isset($selector)) $temp->findInPlace($selector); + return $temp; + */ + // https://en.wikipedia.org/wiki/Dolly_(sheep) + $dolly = clone $this; + $dolly->setMatches($matches); + //var_dump($dolly); exit; + if (isset($selector)) $dolly->findInPlace($selector); + return $dolly; + } + /** + * Perform a deep clone of each node in the DOMQuery. + * + * @attention + * This is an in-place modification of the current QueryPath object. + * + * This does not clone the DOMQuery object, but instead clones the + * list of nodes wrapped by the DOMQuery. Every element is deeply + * cloned. + * + * This method is analogous to jQuery's clone() method. + * + * This is a destructive operation, which means that end() will revert + * the list back to the clone's original. + * @see qp() + * @return \QueryPath\DOMQuery + */ + public function cloneAll() { + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) $found->attach($m->cloneNode(TRUE)); + //return $this->inst($found, NULL, $this->options); + $this->setMatches($found); + return $this; + } + + /** + * Clone the DOMQuery. + * + * This makes a deep clone of the elements inside of the DOMQuery. + * + * This clones only the QueryPathImpl, not all of the decorators. The + * clone operator in PHP should handle the cloning of the decorators. + */ + public function __clone() { + //XXX: Should we clone the document? + + // Make sure we clone the kids. + $this->cloneAll(); + } + + /** + * Detach any items from the list if they match the selector. + * + * In other words, each item that matches the selector will be removed + * from the DOM document. The returned DOMQuery wraps the list of + * removed elements. + * + * If no selector is specified, this will remove all current matches from + * the document. + * + * @param string $selector + * A CSS Selector. + * @return \QueryPath\DOMQuery + * The Query path wrapping a list of removed items. + * @see replaceAll() + * @see replaceWith() + * @see removeChildren() + * @since 2.1 + * @author eabrand + */ + public function detach($selector = NULL) { + + if(!empty($selector)) + $this->find($selector); + + $found = new \SplObjectStorage(); + $this->last = $this->matches; + foreach ($this->matches as $item) { + // The item returned is (according to docs) different from + // the one passed in, so we have to re-store it. + $found->attach($item->parentNode->removeChild($item)); + } + return $this->inst($found, NULL, $this->options); + } + + /** + * Attach any items from the list if they match the selector. + * + * If no selector is specified, this will remove all current matches from + * the document. + * + * @param DOMQuery $dest + * A DOMQuery Selector. + * @return \QueryPath\DOMQuery + * The Query path wrapping a list of removed items. + * @see replaceAll() + * @see replaceWith() + * @see removeChildren() + * @since 2.1 + * @author eabrand + */ + public function attach(DOMQuery $dest) { + foreach ($this->last as $m) $dest->append($m); + return $this; + } + + /** + * Reduce the elements matched by DOMQuery to only those which contain the given item. + * + * There are two ways in which this is different from jQuery's implementation: + * - We allow ANY DOMNode, not just DOMElements. That means this will work on + * processor instructions, text nodes, comments, etc. + * - Unlike jQuery, this implementation of has() follows QueryPath standard behavior + * and modifies the existing object. It does not create a brand new object. + * + * @param mixed $contained + * - If $contained is a CSS selector (e.g. '#foo'), this will test to see + * if the current DOMQuery has any elements that contain items that match + * the selector. + * - If $contained is a DOMNode, then this will test to see if THE EXACT DOMNode + * exists in the currently matched elements. (Note that you cannot match across DOM trees, even if it is the same document.) + * @since 2.1 + * @author eabrand + * @todo It would be trivially easy to add support for iterating over an array or Iterable of DOMNodes. + */ + public function has($contained) { + /* + if (count($this->matches) == 0) { + return false; + } + */ + $found = new \SplObjectStorage(); + + // If it's a selector, we just get all of the DOMNodes that match the selector. + $nodes = array(); + if (is_string($contained)) { + // Get the list of nodes. + $nodes = $this->branch($contained)->get(); + } + elseif ($contained instanceof \DOMNode) { + // Make a list with one node. + $nodes = array($contained); + } + + // Now we go through each of the nodes that we are testing. We want to find + // ALL PARENTS that are in our existing DOMQuery matches. Those are the + // ones we add to our new matches. + foreach ($nodes as $original_node) { + $node = $original_node; + while (!empty($node)/* && $node != $node->ownerDocument*/) { + if ($this->matches->contains($node)) { + $found->attach($node); + } + $node = $node->parentNode; + } + } + + return $this->inst($found, NULL, $this->options); + } + + /** + * Empty everything within the specified element. + * + * A convenience function for removeChildren(). This is equivalent to jQuery's + * empty() function. However, `empty` is a built-in in PHP, and cannot be used as a + * function name. + * + * @return \QueryPath\DOMQuery + * The DOMQuery object with the newly emptied elements. + * @see removeChildren() + * @since 2.1 + * @author eabrand + * @deprecated The removeChildren() function is the preferred method. + */ + public function emptyElement() { + $this->removeChildren(); + return $this; + } + + /** + * Get the even elements, so counter-intuitively 1, 3, 5, etc. + * + * + * + * @return \QueryPath\DOMQuery + * A DOMQuery wrapping all of the children. + * @see removeChildren() + * @see parent() + * @see parents() + * @see next() + * @see prev() + * @since 2.1 + * @author eabrand + */ + public function even() { + $found = new \SplObjectStorage(); + $even = false; + foreach ($this->matches as $m) { + if ($even && $m->nodeType == XML_ELEMENT_NODE) $found->attach($m); + $even = ($even) ? false : true; + } + return $this->inst($found, NULL, $this->options); + } + + /** + * Get the odd elements, so counter-intuitively 0, 2, 4, etc. + * + * + * + * @return \QueryPath\DOMQuery + * A DOMQuery wrapping all of the children. + * @see removeChildren() + * @see parent() + * @see parents() + * @see next() + * @see prev() + * @since 2.1 + * @author eabrand + */ + public function odd() { + $found = new \SplObjectStorage(); + $odd = true; + foreach ($this->matches as $m) { + if ($odd && $m->nodeType == XML_ELEMENT_NODE) $found->attach($m); + $odd = ($odd) ? false : true; + } + return $this->inst($found, NULL, $this->options); + } + + /** + * Get the first matching element. + * + * + * @return \QueryPath\DOMQuery + * A DOMQuery wrapping all of the children. + * @see next() + * @see prev() + * @since 2.1 + * @author eabrand + */ + public function first() { + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) { + if ($m->nodeType == XML_ELEMENT_NODE) { + $found->attach($m); + break; + } + } + return $this->inst($found, NULL, $this->options); + } + + /** + * Get the first child of the matching element. + * + * + * @return \QueryPath\DOMQuery + * A DOMQuery wrapping all of the children. + * @see next() + * @see prev() + * @since 2.1 + * @author eabrand + */ + public function firstChild() { + // Could possibly use $m->firstChild http://theserverpages.com/php/manual/en/ref.dom.php + $found = new \SplObjectStorage(); + $flag = false; + foreach ($this->matches as $m) { + foreach($m->childNodes as $c) { + if ($c->nodeType == XML_ELEMENT_NODE) { + $found->attach($c); + $flag = true; + break; + } + } + if($flag) break; + } + return $this->inst($found, NULL, $this->options); + } + + /** + * Get the last matching element. + * + * + * @return \QueryPath\DOMQuery + * A DOMQuery wrapping all of the children. + * @see next() + * @see prev() + * @since 2.1 + * @author eabrand + */ + public function last() { + $found = new \SplObjectStorage(); + $item = null; + foreach ($this->matches as $m) { + if ($m->nodeType == XML_ELEMENT_NODE) { + $item = $m; + } + } + if ($item) { + $found->attach($item); + } + return $this->inst($found, NULL, $this->options); + } + + /** + * Get the last child of the matching element. + * + * + * @return \QueryPath\DOMQuery + * A DOMQuery wrapping all of the children. + * @see next() + * @see prev() + * @since 2.1 + * @author eabrand + */ + public function lastChild() { + $found = new \SplObjectStorage(); + $item = null; + foreach ($this->matches as $m) { + foreach($m->childNodes as $c) { + if ($c->nodeType == XML_ELEMENT_NODE) { + $item = $c; + } + } + if ($item) { + $found->attach($item); + $item = null; + } + } + return $this->inst($found, NULL, $this->options); + } + + /** + * Get all siblings after an element until the selector is reached. + * + * For each element in the DOMQuery, get all siblings that appear after + * it. If a selector is passed in, then only siblings that match the + * selector will be included. + * + * @param string $selector + * A valid CSS 3 selector. + * @return \QueryPath\DOMQuery + * The DOMQuery object, now containing the matching siblings. + * @see next() + * @see prevAll() + * @see children() + * @see siblings() + * @since 2.1 + * @author eabrand + */ + public function nextUntil($selector = NULL) { + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) { + while (isset($m->nextSibling)) { + $m = $m->nextSibling; + if ($m->nodeType === XML_ELEMENT_NODE) { + if (!empty($selector)) { + if (QueryPath::with($m, NULL, $this->options)->is($selector) > 0) { + break; + } + else { + $found->attach($m); + } + } + else { + $found->attach($m); + } + } + } + } + return $this->inst($found, NULL, $this->options); + } + + /** + * Get the previous siblings for each element in the DOMQuery + * until the selector is reached. + * + * For each element in the DOMQuery, get all previous siblings. If a + * selector is provided, only matching siblings will be retrieved. + * + * @param string $selector + * A valid CSS 3 selector. + * @return \QueryPath\DOMQuery + * The DOMQuery object, now wrapping previous sibling elements. + * @see prev() + * @see nextAll() + * @see siblings() + * @see contents() + * @see children() + * @since 2.1 + * @author eabrand + */ + public function prevUntil($selector = NULL) { + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) { + while (isset($m->previousSibling)) { + $m = $m->previousSibling; + if ($m->nodeType === XML_ELEMENT_NODE) { + if (!empty($selector) && QueryPath::with($m, NULL, $this->options)->is($selector)) + break; + else + $found->attach($m); + } + } + } + return $this->inst($found, NULL, $this->options); + } + + /** + * Get all ancestors of each element in the DOMQuery until the selector is reached. + * + * If a selector is present, only matching ancestors will be retrieved. + * + * @see parent() + * @param string $selector + * A valid CSS 3 Selector. + * @return \QueryPath\DOMQuery + * A DOMNode object containing the matching ancestors. + * @see siblings() + * @see children() + * @since 2.1 + * @author eabrand + */ + public function parentsUntil($selector = NULL) { + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) { + while ($m->parentNode->nodeType !== XML_DOCUMENT_NODE) { + $m = $m->parentNode; + // Is there any case where parent node is not an element? + if ($m->nodeType === XML_ELEMENT_NODE) { + if (!empty($selector)) { + if (QueryPath::with($m, NULL, $this->options)->is($selector) > 0) + break; + else + $found->attach($m); + } + else + $found->attach($m); + } + } + } + return $this->inst($found, NULL, $this->options); + } + + /////// INTERNAL FUNCTIONS //////// + + + /** + * Determine whether a given string looks like XML or not. + * + * Basically, this scans a portion of the supplied string, checking to see + * if it has a tag-like structure. It is possible to "confuse" this, which + * may subsequently result in parse errors, but in the vast majority of + * cases, this method serves as a valid inicator of whether or not the + * content looks like XML. + * + * Things that are intentional excluded: + * - plain text with no markup. + * - strings that look like filesystem paths. + * + * Subclasses SHOULD NOT OVERRIDE THIS. Altering it may be altering + * core assumptions about how things work. Instead, classes should + * override the constructor and pass in only one of the parsed types + * that this class expects. + */ + protected function isXMLish($string) { + return (strpos($string, '<') !== FALSE && strpos($string, '>') !== FALSE); + } + + private function parseXMLString($string, $flags = NULL) { + + $document = new \DOMDocument('1.0'); + $lead = strtolower(substr($string, 0, 5)); // <?xml + try { + set_error_handler(array('\QueryPath\ParseException', 'initializeFromError'), $this->errTypes); + + if (isset($this->options['convert_to_encoding'])) { + // Is there another way to do this? + + $from_enc = isset($this->options['convert_from_encoding']) ? $this->options['convert_from_encoding'] : 'auto'; + $to_enc = $this->options['convert_to_encoding']; + + if (function_exists('mb_convert_encoding')) { + $string = mb_convert_encoding($string, $to_enc, $from_enc); + } + + } + + // This is to avoid cases where low ascii digits have slipped into HTML. + // AFAIK, it should not adversly effect UTF-8 documents. + if (!empty($this->options['strip_low_ascii'])) { + $string = filter_var($string, FILTER_UNSAFE_RAW, FILTER_FLAG_ENCODE_LOW); + } + + // Allow users to override parser settings. + if (empty($this->options['use_parser'])) { + $useParser = ''; + } + else { + $useParser = strtolower($this->options['use_parser']); + } + + // If HTML parser is requested, we use it. + if ($useParser == 'html') { + $document->loadHTML($string); + } + // Parse as XML if it looks like XML, or if XML parser is requested. + elseif ($lead == '<?xml' || $useParser == 'xml') { + if ($this->options['replace_entities']) { + $string = \QueryPath\Entities::replaceAllEntities($string); + } + $document->loadXML($string, $flags); + } + // In all other cases, we try the HTML parser. + else { + $document->loadHTML($string); + } + } + // Emulate 'finally' behavior. + catch (Exception $e) { + restore_error_handler(); + throw $e; + } + restore_error_handler(); + + if (empty($document)) { + throw new \QueryPath\ParseException('Unknown parser exception.'); + } + return $document; + } + + /** + * EXPERT: Be very, very careful using this. + * A utility function for setting the current set of matches. + * It makes sure the last matches buffer is set (for end() and andSelf()). + * @since 2.0 + */ + public function setMatches($matches, $unique = TRUE) { + // This causes a lot of overhead.... + //if ($unique) $matches = self::unique($matches); + $this->last = $this->matches; + + // Just set current matches. + if ($matches instanceof \SplObjectStorage) { + $this->matches = $matches; + } + // This is likely legacy code that needs conversion. + elseif (is_array($matches)) { + trigger_error('Legacy array detected.'); + $tmp = new \SplObjectStorage(); + foreach ($matches as $m) $tmp->attach($m); + $this->matches = $tmp; + } + // For non-arrays, try to create a new match set and + // add this object. + else { + $found = new \SplObjectStorage(); + if (isset($matches)) $found->attach($matches); + $this->matches = $found; + } + + // EXPERIMENTAL: Support for qp()->length. + $this->length = $this->matches->count(); + } + + /** + * Set the match monitor to empty. + * + * This preserves history. + * + * @since 2.0 + */ + private function noMatches() { + $this->setMatches(NULL); + } + + /** + * A utility function for retriving a match by index. + * + * The internal data structure used in DOMQuery does not have + * strong random access support, so we suppliment it with this method. + */ + private function getNthMatch($index) { + if ($index > $this->matches->count() || $index < 0) return; + + $i = 0; + foreach ($this->matches as $m) { + if ($i++ == $index) return $m; + } + } + + /** + * Convenience function for getNthMatch(0). + */ + private function getFirstMatch() { + $this->matches->rewind(); + return $this->matches->current(); + } + + /** + * Parse an XML or HTML file. + * + * This attempts to autodetect the type of file, and then parse it. + * + * @param string $filename + * The file name to parse. + * @param int $flags + * The OR-combined flags accepted by the DOM parser. See the PHP documentation + * for DOM or for libxml. + * @param resource $context + * The stream context for the file IO. If this is set, then an alternate + * parsing path is followed: The file is loaded by PHP's stream-aware IO + * facilities, read entirely into memory, and then handed off to + * {@link parseXMLString()}. On large files, this can have a performance impact. + * @throws \QueryPath\ParseException + * Thrown when a file cannot be loaded or parsed. + */ + private function parseXMLFile($filename, $flags = NULL, $context = NULL) { + + // If a context is specified, we basically have to do the reading in + // two steps: + if (!empty($context)) { + try { + set_error_handler(array('\QueryPath\ParseException', 'initializeFromError'), $this->errTypes); + $contents = file_get_contents($filename, FALSE, $context); + } + // Apparently there is no 'finally' in PHP, so we have to restore the error + // handler this way: + catch(Exception $e) { + restore_error_handler(); + throw $e; + } + restore_error_handler(); + + if ($contents == FALSE) { + throw new \QueryPath\ParseException(sprintf('Contents of the file %s could not be retrieved.', $filename)); + } + return $this->parseXMLString($contents, $flags); + } + + $document = new \DOMDocument(); + $lastDot = strrpos($filename, '.'); + + $htmlExtensions = array( + '.html' => 1, + '.htm' => 1, + ); + + // Allow users to override parser settings. + if (empty($this->options['use_parser'])) { + $useParser = ''; + } + else { + $useParser = strtolower($this->options['use_parser']); + } + + $ext = $lastDot !== FALSE ? strtolower(substr($filename, $lastDot)) : ''; + + try { + set_error_handler(array('\QueryPath\ParseException', 'initializeFromError'), $this->errTypes); + + // If the parser is explicitly set to XML, use that parser. + if ($useParser == 'xml') { + $r = $document->load($filename, $flags); + } + // Otherwise, see if it looks like HTML. + elseif (isset($htmlExtensions[$ext]) || $useParser == 'html') { + // Try parsing it as HTML. + $r = $document->loadHTMLFile($filename); + } + // Default to XML. + else { + $r = $document->load($filename, $flags); + } + + } + // Emulate 'finally' behavior. + catch (Exception $e) { + restore_error_handler(); + throw $e; + } + restore_error_handler(); + return $document; + } + + /** + * Call extension methods. + * + * This function is used to invoke extension methods. It searches the + * registered extenstensions for a matching function name. If one is found, + * it is executed with the arguments in the $arguments array. + * + * @throws QueryPath::Exception + * An exception is thrown if a non-existent method is called. + */ + public function __call($name, $arguments) { + + if (!ExtensionRegistry::$useRegistry) { + throw new \QueryPath\Exception("No method named $name found (Extensions disabled)."); + } + + // Loading of extensions is deferred until the first time a + // non-core method is called. This makes constructing faster, but it + // may make the first invocation of __call() slower (if there are + // enough extensions.) + // + // The main reason for moving this out of the constructor is that most + // new DOMQuery instances do not use extensions. Charging qp() calls + // with the additional hit is not a good idea. + // + // Also, this will at least limit the number of circular references. + if (empty($this->ext)) { + // Load the registry + $this->ext = ExtensionRegistry::getExtensions($this); + } + + // Note that an empty ext registry indicates that extensions are disabled. + if (!empty($this->ext) && ExtensionRegistry::hasMethod($name)) { + $owner = ExtensionRegistry::getMethodClass($name); + $method = new \ReflectionMethod($owner, $name); + return $method->invokeArgs($this->ext[$owner], $arguments); + } + throw new \QueryPath\Exception("No method named $name found. Possibly missing an extension."); + } + + /** + * Get an iterator for the matches in this object. + * @return Iterable + * Returns an iterator. + */ + public function getIterator() { + $i = new QueryPathIterator($this->matches); + $i->options = $this->options; + return $i; + } +} diff --git a/lib/querypath/src/QueryPath/Entities.php b/lib/querypath/src/QueryPath/Entities.php new file mode 100644 index 0000000..5670dc5 --- /dev/null +++ b/lib/querypath/src/QueryPath/Entities.php @@ -0,0 +1,162 @@ +<?php +/** + * @file + * HTML entity utilities. + */ + +namespace QueryPath; + +/** + * Perform various tasks on HTML/XML entities. + * + * @ingroup querypath_util + */ +class Entities { + + /** + * This is three regexes wrapped into 1. The | divides them. + * 1: Match any char-based entity. This will go in $matches[1] + * 2: Match any num-based entity. This will go in $matches[2] + * 3: Match any hex-based entry. This will go in $matches[3] + * 4: Match any ampersand that is not an entity. This goes in $matches[4] + * This last rule will only match if one of the previous two has not already + * matched. + * XXX: Are octal encodings for entities acceptable? + */ + //protected static $regex = '/&([\w]+);|&#([\d]+);|&([\w]*[\s$]+)/m'; + protected static $regex = '/&([\w]+);|&#([\d]+);|&#(x[0-9a-fA-F]+);|(&)/m'; + + /** + * Replace all entities. + * This will scan a string and will attempt to replace all + * entities with their numeric equivalent. This will not work + * with specialized entities. + * + * @param string $string + * The string to perform replacements on. + * @return string + * Returns a string that is similar to the original one, but with + * all entity replacements made. + */ + public static function replaceAllEntities($string) { + return preg_replace_callback(self::$regex, '\QueryPath\Entities::doReplacement', $string); + } + + /** + * Callback for processing replacements. + * + * @param array $matches + * The regular expression replacement array. + */ + protected static function doReplacement($matches) { + // See how the regex above works out. + //print_r($matches); + + // From count, we can tell whether we got a + // char, num, or bare ampersand. + $count = count($matches); + switch ($count) { + case 2: + // We have a character entity + return '&#' . self::replaceEntity($matches[1]) . ';'; + case 3: + case 4: + // we have a numeric entity + return '&#' . $matches[$count-1] . ';'; + case 5: + // We have an unescaped ampersand. + return '&'; + } + } + + /** + * Lookup an entity string's numeric equivalent. + * + * @param string $entity + * The entity whose numeric value is needed. + * @return int + * The integer value corresponding to the entity. + * @author Matt Butcher + * @author Ryan Mahoney + */ + public static function replaceEntity($entity) { + return self::$entity_array[$entity]; + } + + /** + * Conversion mapper for entities in HTML. + * Large entity conversion table. This is + * significantly broader in range than + * get_html_translation_table(HTML_ENTITIES). + * + * This code comes from Rhizome ({@link http://code.google.com/p/sinciput}) + * + * @todo See if we can do this as a const. + * @see get_html_translation_table() + */ + private static $entity_array = array( + 'nbsp' => 160, 'iexcl' => 161, 'cent' => 162, 'pound' => 163, + 'curren' => 164, 'yen' => 165, 'brvbar' => 166, 'sect' => 167, + 'uml' => 168, 'copy' => 169, 'ordf' => 170, 'laquo' => 171, + 'not' => 172, 'shy' => 173, 'reg' => 174, 'macr' => 175, 'deg' => 176, + 'plusmn' => 177, 'sup2' => 178, 'sup3' => 179, 'acute' => 180, + 'micro' => 181, 'para' => 182, 'middot' => 183, 'cedil' => 184, + 'sup1' => 185, 'ordm' => 186, 'raquo' => 187, 'frac14' => 188, + 'frac12' => 189, 'frac34' => 190, 'iquest' => 191, 'Agrave' => 192, + 'Aacute' => 193, 'Acirc' => 194, 'Atilde' => 195, 'Auml' => 196, + 'Aring' => 197, 'AElig' => 198, 'Ccedil' => 199, 'Egrave' => 200, + 'Eacute' => 201, 'Ecirc' => 202, 'Euml' => 203, 'Igrave' => 204, + 'Iacute' => 205, 'Icirc' => 206, 'Iuml' => 207, 'ETH' => 208, + 'Ntilde' => 209, 'Ograve' => 210, 'Oacute' => 211, 'Ocirc' => 212, + 'Otilde' => 213, 'Ouml' => 214, 'times' => 215, 'Oslash' => 216, + 'Ugrave' => 217, 'Uacute' => 218, 'Ucirc' => 219, 'Uuml' => 220, + 'Yacute' => 221, 'THORN' => 222, 'szlig' => 223, 'agrave' => 224, + 'aacute' => 225, 'acirc' => 226, 'atilde' => 227, 'auml' => 228, + 'aring' => 229, 'aelig' => 230, 'ccedil' => 231, 'egrave' => 232, + 'eacute' => 233, 'ecirc' => 234, 'euml' => 235, 'igrave' => 236, + 'iacute' => 237, 'icirc' => 238, 'iuml' => 239, 'eth' => 240, + 'ntilde' => 241, 'ograve' => 242, 'oacute' => 243, 'ocirc' => 244, + 'otilde' => 245, 'ouml' => 246, 'divide' => 247, 'oslash' => 248, + 'ugrave' => 249, 'uacute' => 250, 'ucirc' => 251, 'uuml' => 252, + 'yacute' => 253, 'thorn' => 254, 'yuml' => 255, 'quot' => 34, + 'amp' => 38, 'lt' => 60, 'gt' => 62, 'apos' => 39, 'OElig' => 338, + 'oelig' => 339, 'Scaron' => 352, 'scaron' => 353, 'Yuml' => 376, + 'circ' => 710, 'tilde' => 732, 'ensp' => 8194, 'emsp' => 8195, + 'thinsp' => 8201, 'zwnj' => 8204, 'zwj' => 8205, 'lrm' => 8206, + 'rlm' => 8207, 'ndash' => 8211, 'mdash' => 8212, 'lsquo' => 8216, + 'rsquo' => 8217, 'sbquo' => 8218, 'ldquo' => 8220, 'rdquo' => 8221, + 'bdquo' => 8222, 'dagger' => 8224, 'Dagger' => 8225, 'permil' => 8240, + 'lsaquo' => 8249, 'rsaquo' => 8250, 'euro' => 8364, 'fnof' => 402, + 'Alpha' => 913, 'Beta' => 914, 'Gamma' => 915, 'Delta' => 916, + 'Epsilon' => 917, 'Zeta' => 918, 'Eta' => 919, 'Theta' => 920, + 'Iota' => 921, 'Kappa' => 922, 'Lambda' => 923, 'Mu' => 924, 'Nu' => 925, + 'Xi' => 926, 'Omicron' => 927, 'Pi' => 928, 'Rho' => 929, 'Sigma' => 931, + 'Tau' => 932, 'Upsilon' => 933, 'Phi' => 934, 'Chi' => 935, 'Psi' => 936, + 'Omega' => 937, 'alpha' => 945, 'beta' => 946, 'gamma' => 947, + 'delta' => 948, 'epsilon' => 949, 'zeta' => 950, 'eta' => 951, + 'theta' => 952, 'iota' => 953, 'kappa' => 954, 'lambda' => 955, + 'mu' => 956, 'nu' => 957, 'xi' => 958, 'omicron' => 959, 'pi' => 960, + 'rho' => 961, 'sigmaf' => 962, 'sigma' => 963, 'tau' => 964, + 'upsilon' => 965, 'phi' => 966, 'chi' => 967, 'psi' => 968, + 'omega' => 969, 'thetasym' => 977, 'upsih' => 978, 'piv' => 982, + 'bull' => 8226, 'hellip' => 8230, 'prime' => 8242, 'Prime' => 8243, + 'oline' => 8254, 'frasl' => 8260, 'weierp' => 8472, 'image' => 8465, + 'real' => 8476, 'trade' => 8482, 'alefsym' => 8501, 'larr' => 8592, + 'uarr' => 8593, 'rarr' => 8594, 'darr' => 8595, 'harr' => 8596, + 'crarr' => 8629, 'lArr' => 8656, 'uArr' => 8657, 'rArr' => 8658, + 'dArr' => 8659, 'hArr' => 8660, 'forall' => 8704, 'part' => 8706, + 'exist' => 8707, 'empty' => 8709, 'nabla' => 8711, 'isin' => 8712, + 'notin' => 8713, 'ni' => 8715, 'prod' => 8719, 'sum' => 8721, + 'minus' => 8722, 'lowast' => 8727, 'radic' => 8730, 'prop' => 8733, + 'infin' => 8734, 'ang' => 8736, 'and' => 8743, 'or' => 8744, 'cap' => 8745, + 'cup' => 8746, 'int' => 8747, 'there4' => 8756, 'sim' => 8764, + 'cong' => 8773, 'asymp' => 8776, 'ne' => 8800, 'equiv' => 8801, + 'le' => 8804, 'ge' => 8805, 'sub' => 8834, 'sup' => 8835, 'nsub' => 8836, + 'sube' => 8838, 'supe' => 8839, 'oplus' => 8853, 'otimes' => 8855, + 'perp' => 8869, 'sdot' => 8901, 'lceil' => 8968, 'rceil' => 8969, + 'lfloor' => 8970, 'rfloor' => 8971, 'lang' => 9001, 'rang' => 9002, + 'loz' => 9674, 'spades' => 9824, 'clubs' => 9827, 'hearts' => 9829, + 'diams' => 9830 + ); +} + diff --git a/lib/querypath/src/QueryPath/Exception.php b/lib/querypath/src/QueryPath/Exception.php new file mode 100644 index 0000000..c84e98e --- /dev/null +++ b/lib/querypath/src/QueryPath/Exception.php @@ -0,0 +1,13 @@ +<?php +/** + * @file + * + * The top-level QueryPath exception. + */ +namespace QueryPath; +/** + * Exception indicating that a problem has occured inside of a QueryPath object. + * + * @ingroup querypath_core + */ +class Exception extends \Exception {} diff --git a/lib/querypath/src/QueryPath/Extension.php b/lib/querypath/src/QueryPath/Extension.php new file mode 100644 index 0000000..ec370ca --- /dev/null +++ b/lib/querypath/src/QueryPath/Extension.php @@ -0,0 +1,94 @@ +<?php +/** @file + * This file contains the Query Path extension tools. + * + * Query Path can be extended to support additional features. To do this, + * you need only create a new class that implements {@link Extension} + * and add your own methods. This class can then be registered as an extension. + * It will then be available through Query Path. + * + * For information on building your own extension, see {@link Extension}. + * If you are trying to load an extension you have downloaded, chances are good that + * all you need to do is {@link require_once} the file that contains the extension. + * + * @author M Butcher <matt@aleph-null.tv> + * @license MIT + * @see Extension + * @see ExtensionRegistry::extend() + */ +namespace QueryPath; + +/** @addtogroup querypath_extensions Extensions + * The QueryPath extension system and bundled extensions. + * + * Much like jQuery, QueryPath provides a simple extension mechanism that allows + * extensions to auto-register themselves upon being loaded. For a simple example, see + * QPXML. For the internals, see QueryPath::Extension and QueryPath::with(). + */ + +/** + * A Extension is a tool that extends the capabilities of a Query object. + * + * Extensions to QueryPath should implement the Extension interface. The + * only requirement is that the extension provide a constructor that takes a + * Query object as a parameter. + * + * Here is an example QueryPath extension: + * @code + * <?php + * class StubExtensionOne implements \QueryPath\Extension { + * private $qp = NULL; + * public function __construct(\QueryPath\Query $qp) { + * $this->qp = $qp; + * } + * + * public function stubToe() { + * $this->qp->find(':root')->append('<toe/>')->end(); + * return $this->qp; + * } + * } + * ?> + * @endcode + * In this example, the StubExtensionOne class implements Extension. + * The constructor stores a local copyof the Query object. This is important + * if you are planning on fully integrating with QueryPath's Fluent Interface. + * + * Finally, the stubToe() function illustrates how the extension makes use of + * QueryPath's Query object internally, and remains part of the fluent interface by returning + * the $qp object. + * + * <b>Enabling an Extension</b> + * + * To enable an extension, call the QueryPath::enable() method. + * + * @code + * <?php + * QueryPath::enable('StubExtension'); + * ?> + * @endcode + * + * More complex management of extensions can be accomplished with the + * QueryPath::ExtensionRegistry class. + * + * <b>How is a QueryPath extension called?</b> + * + * QueryPath extensions are called like regular QueryPath functions. For + * example, the extension above can be called like this: + * <code> + * qp('some.xml')->stubToe(); + * // or + * QueryPath::with('some.xml')->stubToe(); + * </code> + * Since it returns the Query ($qp) object, chaining is supported: + * <code> + * print qp('some.xml')->stubToe()->xml(); + * </code> + * When you write your own extensions, anything that does not need to return a + * specific value should return the Query object. Between that and the + * extension registry, this will provide the best developer experience. + * + * @ingroup querypath_extensions + */ +interface Extension { + public function __construct(\QueryPath\Query $qp); +} diff --git a/lib/querypath/src/QueryPath/Extension/QPXML.php b/lib/querypath/src/QueryPath/Extension/QPXML.php new file mode 100644 index 0000000..5372e59 --- /dev/null +++ b/lib/querypath/src/QueryPath/Extension/QPXML.php @@ -0,0 +1,211 @@ +<?php +/** @file + * XML extensions. See QPXML. + */ +namespace QueryPath\Extension; + +use \QueryPath; +/** + * Provide QueryPath with additional XML tools. + * + * @author M Butcher <matt@aleph-null.tv> + * @author Xander Guzman <theshadow@shadowpedia.info> + * @license MIT + * @see QueryPath::Extension + * @see QueryPath::ExtensionRegistry::extend() + * @see QPXML + * @ingroup querypath_extensions + */ +class QPXML implements \QueryPath\Extension { + + protected $qp; + + public function __construct(\QueryPath\Query $qp) { + $this->qp = $qp; + } + + public function schema($file) { + $doc = $this->qp->branch()->top()->get(0)->ownerDocument; + + if (!$doc->schemaValidate($file)) { + throw new \QueryPath\Exception('Document did not validate against the schema.'); + } + } + + /** + * Get or set a CDATA section. + * + * If this is given text, it will create a CDATA section in each matched element, + * setting that item's value to $text. + * + * If no parameter is passed in, this will return the first CDATA section that it + * finds in the matched elements. + * + * @param string $text + * The text data to insert into the current matches. If this is NULL, then the first + * CDATA will be returned. + * + * @return mixed + * If $text is not NULL, this will return a {@link QueryPath}. Otherwise, it will + * return a string. If no CDATA is found, this will return NULL. + * @see comment() + * @see QueryPath::text() + * @see QueryPath::html() + */ + public function cdata($text = NULL) { + if (isset($text)) { + // Add this text as CDATA in the current elements. + foreach ($this->qp->get() as $element) { + $cdata = $element->ownerDocument->createCDATASection($text); + $element->appendChild($cdata); + } + return $this->qp;; + } + + // Look for CDATA sections. + foreach ($this->qp->get() as $ele) { + foreach ($ele->childNodes as $node) { + if ($node->nodeType == XML_CDATA_SECTION_NODE) { + // Return first match. + return $node->textContent; + } + } + } + return NULL; + // Nothing found + } + + /** + * Get or set a comment. + * + * This function is used to get or set comments in an XML or HTML document. + * If a $text value is passed in (and is not NULL), then this will add a comment + * (with the value $text) to every match in the set. + * + * If no text is passed in, this will return the first comment in the set of matches. + * If no comments are found, NULL will be returned. + * + * @param string $text + * The text of the comment. If set, a new comment will be created in every item + * wrapped by the current {@link QueryPath}. + * @return mixed + * If $text is set, this will return a {@link QueryPath}. If no text is set, this + * will search for a comment and attempt to return the string value of the first + * comment it finds. If no comment is found, NULL will be returned. + * @see cdata() + */ + public function comment($text = NULL) { + if (isset($text)) { + foreach ($this->qp->get() as $element) { + $comment = $element->ownerDocument->createComment($text); + $element->appendChild($comment); + } + return $this->qp; + } + foreach ($this->qp->get() as $ele) { + foreach ($ele->childNodes as $node) { + if ($node->nodeType == XML_COMMENT_NODE) { + // Return first match. + return $node->textContent; + } + } + } + } + + /** + * Get or set a processor instruction. + */ + public function pi($prefix = NULL, $text = NULL) { + if (isset($text)) { + foreach ($this->qp->get() as $element) { + $comment = $element->ownerDocument->createProcessingInstruction($prefix, $text); + $element->appendChild($comment); + } + return $this->qp; + } + foreach ($this->qp->get() as $ele) { + foreach ($ele->childNodes as $node) { + if ($node->nodeType == XML_PI_NODE) { + + if (isset($prefix)) { + if ($node->tagName == $prefix) { + return $node->textContent; + } + } + else { + // Return first match. + return $node->textContent; + } + } + } // foreach + } // foreach + } + public function toXml() { + return $this->qp->document()->saveXml(); + } + + /** + * Create a NIL element. + * + * @param string $text + * @param string $value + * @reval object $element + */ + public function createNilElement($text, $value) { + $value = ($value)? 'true':'false'; + $element = $this->qp->createElement($text); + $element->attr('xsi:nil', $value); + return $element; + } + + /** + * Create an element with the given namespace. + * + * @param string $text + * @param string $nsUri + * The namespace URI for the given element. + * @return \QueryPath\DOMQuery + */ + public function createElement($text, $nsUri = null) { + if (isset ($text)) { + foreach ($this->qp->get() as $element) { + if ($nsUri === null && strpos($text, ':') !== false) { + $ns = array_shift(explode(':', $text)); + $nsUri = $element->ownerDocument->lookupNamespaceURI($ns); + + if ($nsUri === null) { + throw new \QueryPath\Exception("Undefined namespace for: " . $text); + } + } + + $node = null; + if ($nsUri !== null) { + $node = $element->ownerDocument->createElementNS( + $nsUri, + $text + ); + } else { + $node = $element->ownerDocument->createElement($text); + } + return QueryPath::with($node); + } + } + return; + } + + /** + * Append an element. + * + * @param string $text + * @return \QueryPath\DOMQuery + */ + public function appendElement($text) { + if (isset ($text)) { + foreach ($this->qp->get() as $element) { + $node = $this->qp->createElement($text); + QueryPath::with($element)->append($node); + } + } + return $this->qp; + } +} diff --git a/lib/querypath/src/QueryPath/Extension/QPXSL.php b/lib/querypath/src/QueryPath/Extension/QPXSL.php new file mode 100644 index 0000000..6a2727c --- /dev/null +++ b/lib/querypath/src/QueryPath/Extension/QPXSL.php @@ -0,0 +1,76 @@ +<?php +/** @file + * Provide QueryPath with XSLT support using the PHP libxslt module. + * + * This is called 'QPXSL' instead of 'QPXSLT' in accordance with the name + * of the PHP extension that provides libxslt support. + * + * You must have PHP XSL support for this to function. + * + * @author M Butcher <matt@aleph-null.tv> + * @license MIT + * @see QueryPath::Extension + * @see QueryPath::ExtensionRegistry::extend() + * @see QPXSL + * @see QPXML + */ + +namespace QueryPath\Extension; + +/** + * Provide tools for running XSL Transformation (XSLT) on a document. + * + * This extension provides the {@link QPXSL::xslt()} function, which transforms + * a source XML document into another XML document according to the rules in + * an XSLT document. + * + * This QueryPath extension can be used as follows: + * <code> + * <?php + * require 'QueryPath/QueryPath.php'; + * require 'QueryPath/Extension/QPXSL.php'; + * + * qp('src.xml')->xslt('stylesheet.xml')->writeXML(); + * ?> + * + * This will transform src.xml according to the XSLT rules in + * stylesheet.xml. The results are returned as a QueryPath object, which + * is written to XML using {@link QueryPath::writeXML()}. + * </code> + * + * @ingroup querypath_extensions + */ +class QPXSL implements \QueryPath\Extension { + + protected $src = NULL; + + public function __construct(\QueryPath\Query $qp) { + $this->src = $qp; + } + + /** + * Given an XSLT stylesheet, run a transformation. + * + * This will attempt to read the provided stylesheet and then + * execute it on the current source document. + * + * @param mixed $style + * This takes a QueryPath object or <em>any</em> of the types that the + * {@link qp()} function can take. + * @return QueryPath + * A QueryPath object wrapping the transformed document. Note that this is a + * <i>different</em> document than the original. As such, it has no history. + * You cannot call {@link QueryPath::end()} to undo a transformation. (However, + * the original source document will remain unchanged.) + */ + public function xslt($style) { + if (!($style instanceof QueryPath)) { + $style = \QueryPath::with($style); + } + $sourceDoc = $this->src->top()->get(0)->ownerDocument; + $styleDoc = $style->get(0)->ownerDocument; + $processor = new \XSLTProcessor(); + $processor->importStylesheet($styleDoc); + return \QueryPath::with($processor->transformToDoc($sourceDoc)); + } +} diff --git a/lib/querypath/src/QueryPath/ExtensionRegistry.php b/lib/querypath/src/QueryPath/ExtensionRegistry.php new file mode 100644 index 0000000..963edd3 --- /dev/null +++ b/lib/querypath/src/QueryPath/ExtensionRegistry.php @@ -0,0 +1,130 @@ +<?php +/** + * @file + * The extension registry. + */ +namespace QueryPath; +/** + * A registry for QueryPath extensions. + * + * QueryPath extensions should call the QueryPath::ExtensionRegistry::extend() + * function to register their extension classes. The QueryPath library then + * uses this information to determine what QueryPath extensions should be loaded and + * executed. + * + * Extensions are attached to a Query object. + * + * To enable an extension (the easy way), use QueryPath::enable(). + * + * This class provides lower-level interaction with the extension + * mechanism. + * + * @ingroup querypath_extensions + */ +class ExtensionRegistry { + /** + * Internal flag indicating whether or not the registry should + * be used for automatic extension loading. If this is false, then + * implementations should not automatically load extensions. + */ + public static $useRegistry = TRUE; + /** + * The extension registry. This should consist of an array of class + * names. + */ + protected static $extensionRegistry = array(); + protected static $extensionMethodRegistry = array(); + /** + * Extend a Query with the given extension class. + */ + public static function extend($classname) { + self::$extensionRegistry[] = $classname; + $class = new \ReflectionClass($classname); + $methods = $class->getMethods(); + foreach ($methods as $method) { + self::$extensionMethodRegistry[$method->getName()] = $classname; + } + } + + /** + * Check to see if a method is known. + * This checks to see if the given method name belongs to one of the + * registered extensions. If it does, then this will return TRUE. + * + * @param string $name + * The name of the method to search for. + * @return boolean + * TRUE if the method exists, false otherwise. + */ + public static function hasMethod($name) { + return isset(self::$extensionMethodRegistry[$name]); + } + + /** + * Check to see if the given extension class is registered. + * Given a class name for a QueryPath::Extension class, this + * will check to see if that class is registered. If so, it will return + * TRUE. + * + * @param string $name + * The name of the class. + * @return boolean + * TRUE if the class is registered, FALSE otherwise. + */ + public static function hasExtension($name) { + return in_array($name, self::$extensionRegistry); + } + + /** + * Get the class that a given method belongs to. + * Given a method name, this will check all registered extension classes + * to see if any of them has the named method. If so, this will return + * the classname. + * + * Note that if two extensions are registered that contain the same + * method name, the last one registred will be the only one recognized. + * + * @param string $name + * The name of the method. + * @return string + * The name of the class. + */ + public static function getMethodClass($name) { + return self::$extensionMethodRegistry[$name]; + } + + /** + * Get extensions for the given Query object. + * + * Given a Query object, this will return + * an associative array of extension names to (new) instances. + * Generally, this is intended to be used internally. + * + * @param Query $qp + * The Query into which the extensions should be registered. + * @return array + * An associative array of classnames to instances. + */ + public static function getExtensions(Query $qp) { + $extInstances = array(); + foreach (self::$extensionRegistry as $ext) { + $extInstances[$ext] = new $ext($qp); + } + return $extInstances; + } + + public static function extensionNames() { + return self::$extensionRegistry; + } + + /** + * Enable or disable automatic extension loading. + * + * If extension autoloading is disabled, then QueryPath will not + * automatically load all registred extensions when a new Query + * object is created using qp(). + */ + public static function autoloadExtensions($boolean = TRUE) { + self::$useRegistry = $boolean; + } +} diff --git a/lib/querypath/src/QueryPath/IOException.php b/lib/querypath/src/QueryPath/IOException.php new file mode 100644 index 0000000..b23b381 --- /dev/null +++ b/lib/querypath/src/QueryPath/IOException.php @@ -0,0 +1,20 @@ +<?php +/** + * @file + * + * General IO exception. + */ + +namespace QueryPath; + +/** + * Indicates that an input/output exception has occurred. + * + * @ingroup querypath_core + */ +class IOException extends \QueryPath\ParseException { + public static function initializeFromError($code, $str, $file, $line, $cxt) { + $class = __CLASS__; + throw new $class($str, $code, $file, $line); + } +} diff --git a/lib/querypath/src/QueryPath/Options.php b/lib/querypath/src/QueryPath/Options.php new file mode 100644 index 0000000..846984d --- /dev/null +++ b/lib/querypath/src/QueryPath/Options.php @@ -0,0 +1,84 @@ +<?php +/** + * @file + * + * Options management. + */ +namespace QueryPath; + + +/** + * Manage default options. + * + * This class stores the default options for QueryPath. When a new + * QueryPath object is constructed, options specified here will be + * used. + * + * <b>Details</b> + * This class defines no options of its own. Instead, it provides a + * central tool for developers to override options set by QueryPath. + * When a QueryPath object is created, it will evaluate options in the + * following order: + * + * - Options passed into qp() have highest priority. + * - Options in QueryPath::Options (this class) have the next highest priority. + * - If the option is not specified elsewhere, QueryPath will use its own defaults. + * + * @see qp() + * @see QueryPath::Options::set() + * @ingroup querypath_util + */ +class Options { + /** + * This is the static options array. + * + * Use the {@link set()}, {@link get()}, and {@link merge()} to + * modify this array. + */ + static $options = array(); + /** + * Set the default options. + * + * The passed-in array will be used as the default options list. + * + * @param array $array + * An associative array of options. + */ + static function set($array) { + self::$options = $array; + } + /** + * Get the default options. + * + * Get all options currently set as default. + * + * @return array + * An array of options. Note that only explicitly set options are + * returned. {@link QueryPath} defines default options which are not + * stored in this object. + */ + static function get() { + return self::$options; + } + /** + * Merge the provided array with existing options. + * + * On duplicate keys, the value in $array will overwrite the + * value stored in the options. + * + * @param array $array + * Associative array of options to merge into the existing options. + */ + static function merge($array) { + self::$options = $array + self::$options; + } + /** + * Returns true of the specified key is already overridden in this object. + * + * @param string $key + * The key to search for. + */ + static function has($key) { + return array_key_exists($key, self::$options); + } +} diff --git a/lib/querypath/src/QueryPath/ParseException.php b/lib/querypath/src/QueryPath/ParseException.php new file mode 100644 index 0000000..7da7beb --- /dev/null +++ b/lib/querypath/src/QueryPath/ParseException.php @@ -0,0 +1,44 @@ +<?php +/** + * @file + * Query path parsing exception. + */ + +namespace QueryPath; + +/** + * Exception indicating that a parser has failed to parse a file. + * + * This will report parser warnings as well as parser errors. It should only be + * thrown, though, under error conditions. + * + * @ingroup querypath_core + */ +class ParseException extends \QueryPath\Exception { + const ERR_MSG_FORMAT = 'Parse error in %s on line %d column %d: %s (%d)'; + const WARN_MSG_FORMAT = 'Parser warning in %s on line %d column %d: %s (%d)'; + // trigger_error + public function __construct($msg = '', $code = 0, $file = NULL, $line = NULL) { + + $msgs = array(); + foreach(libxml_get_errors() as $err) { + $format = $err->level == LIBXML_ERR_WARNING ? self::WARN_MSG_FORMAT : self::ERR_MSG_FORMAT; + $msgs[] = sprintf($format, $err->file, $err->line, $err->column, $err->message, $err->code); + } + $msg .= implode("\n", $msgs); + + if (isset($file)) { + $msg .= ' (' . $file; + if (isset($line)) $msg .= ': ' . $line; + $msg .= ')'; + } + + parent::__construct($msg, $code); + } + + public static function initializeFromError($code, $str, $file, $line, $cxt) { + //printf("\n\nCODE: %s %s\n\n", $code, $str); + $class = __CLASS__; + throw new $class($str, $code, $file, $line); + } +} diff --git a/lib/querypath/src/QueryPath/Query.php b/lib/querypath/src/QueryPath/Query.php new file mode 100644 index 0000000..81b5cae --- /dev/null +++ b/lib/querypath/src/QueryPath/Query.php @@ -0,0 +1,12 @@ +<?php +namespace QueryPath; +interface Query { + public function __construct($document = NULL, $selector = NULL, $options = NULL); + public function find($selector); + public function top($selector = NULL); + public function next($selector = NULL); + public function prev($selector = NULL); + public function siblings($selector = NULL); + public function parent($selector = NULL); + public function children($selector = NULL); +} diff --git a/lib/querypath/src/QueryPath/QueryPathIterator.php b/lib/querypath/src/QueryPath/QueryPathIterator.php new file mode 100644 index 0000000..be2e66f --- /dev/null +++ b/lib/querypath/src/QueryPath/QueryPathIterator.php @@ -0,0 +1,33 @@ +<?php +/** + * @file + * + * Utility iterator for QueryPath. + */ +namespace QueryPath; + +/** + * An iterator for QueryPath. + * + * This provides iterator support for QueryPath. You do not need to construct + * a QueryPathIterator. QueryPath does this when its QueryPath::getIterator() + * method is called. + * + * @ingroup querypath_util + */ +class QueryPathIterator extends \IteratorIterator { + public $options = array(); + private $qp = NULL; + + public function current() { + if (!isset($this->qp)) { + $this->qp = \QueryPath::with(parent::current(), NULL, $this->options); + } + else { + $splos = new \SplObjectStorage(); + $splos->attach(parent::current()); + $this->qp->setMatches($splos); + } + return $this->qp; + } +} |