From 51609351f2c4b5082b7e6f0744cd3811c325303f Mon Sep 17 00:00:00 2001 From: emkael Date: Tue, 11 Oct 2016 14:01:29 +0200 Subject: * initial template --- lib/querypath/QueryPath.php | 4543 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 4543 insertions(+) create mode 100644 lib/querypath/QueryPath.php (limited to 'lib/querypath/QueryPath.php') diff --git a/lib/querypath/QueryPath.php b/lib/querypath/QueryPath.php new file mode 100644 index 0000000..2585ba6 --- /dev/null +++ b/lib/querypath/QueryPath.php @@ -0,0 +1,4543 @@ +'); + * $qp->append('')->writeHTML(); + * ?> + * @endcode + * + * The above would print (formatted for readability): + * @code + * + * + * + * + * + * + * + * + * @endcode + * + * To learn about the functions available to a Query Path object, + * see {@link QueryPath}. The {@link qp()} function is used to build + * new QueryPath objects. The documentation for that function explains the + * wealth of arguments that the function can take. + * + * Included with the source code for QueryPath is a complete set of unit tests + * as well as some example files. Those are good resources for learning about + * how to apply QueryPath's tools. The full API documentation can be generated + * from these files using PHPDocumentor. + * + * If you are interested in building extensions for QueryParser, see the + * {@link QueryPathExtender} class. There, you will find information on adding + * your own tools to QueryPath. + * + * QueryPath also comes with a full CSS 3 selector parser implementation. If + * you are interested in reusing that in other code, you will want to start + * with {@link CssEventHandler.php}, which is the event interface for the parser. + * + * All of the code in QueryPath is licensed under either the LGPL or an MIT-like + * license (you may choose which you prefer). All of the code is Copyright, 2009 + * by Matt Butcher. + * + * @author M Butcher + * @license http://opensource.org/licenses/lgpl-2.1.php The GNU Lesser GPL (LGPL) or an MIT-like license. + * @see QueryPath + * @see qp() + * @see http://querypath.org The QueryPath home page. + * @see http://api.querypath.org An online version of the API docs. + * @see http://technosophos.com For how-tos and examples. + * @copyright Copyright (c) 2009, Matt Butcher. + * @version 2.1.2 + * + */ + +/** @addtogroup querypath_core Core API + * Core classes and functions for QueryPath. + * + * These are the classes, objects, and functions that developers who use QueryPath + * are likely to use. The qp() and htmlqp() functions are the best place to start, + * while most of the frequently used methods are part of the QueryPath object. + */ + +/** @addtogroup querypath_util Utilities + * Utility classes for QueryPath. + * + * These classes add important, but less-often used features to QueryPath. Some of + * these are used transparently (QueryPathIterator). Others you can use directly in your + * code (QueryPathEntities). + */ + +/* * @namespace QueryPath + * The core classes that compose QueryPath. + * + * The QueryPath classes contain the brunt of the QueryPath code. If you are + * interested in working with just the CSS engine, you may want to look at CssEventHandler, + * which can be used without the rest of QueryPath. If you are interested in looking + * carefully at QueryPath's implementation details, then the QueryPath class is where you + * should begin. If you are interested in writing extensions, than you may want to look at + * QueryPathExtension, and also at some of the simple extensions, such as QPXML. + */ + +/** + * Regular expression for checking whether a string looks like XML. + * @deprecated This is no longer used in QueryPath. + */ +define('ML_EXP','/^[^<]*(<(.|\s)+>)[^>]*$/'); + +/** + * The CssEventHandler interfaces with the CSS parser. + */ +require_once 'CssEventHandler.php'; +/** + * The extender is used to provide support for extensions. + */ +require_once 'QueryPathExtension.php'; + +/** + * Build a new Query Path. + * This builds a new Query Path object. The new object can be used for + * reading, search, and modifying a document. + * + * While it is permissible to directly create new instances of a QueryPath + * implementation, it is not advised. Instead, you should use this function + * as a factory. + * + * Example: + * @code + * '); // From HTML or XML + * qp(QueryPath::XHTML_STUB); // From a basic HTML document. + * qp(QueryPath::XHTML_STUB, 'title'); // Create one from a basic HTML doc and position it at the title element. + * + * // Most of the time, methods are chained directly off of this call. + * qp(QueryPath::XHTML_STUB, 'body')->append('

Title

')->addClass('body-class'); + * ?> + * @endcode + * + * This function is used internally by QueryPath. Anything that modifies the + * behavior of this function may also modify the behavior of common QueryPath + * methods. + * + * Types of documents that QueryPath can support + * + * qp() can take any of these as its first argument: + * + * - A string of XML or HTML (See {@link XHTML_STUB}) + * - A path on the file system or a URL + * - A {@link DOMDocument} object + * - A {@link SimpleXMLElement} object. + * - A {@link DOMNode} object. + * - An array of {@link DOMNode} objects (generally {@link DOMElement} nodes). + * - Another {@link QueryPath} object. + * + * Keep in mind that most features of QueryPath operate on elements. Other + * sorts of DOMNodes might not work with all features. + * + * Supported Options + * - context: A stream context object. This is used to pass context info + * to the underlying file IO subsystem. + * - encoding: A valid character encoding, such as 'utf-8' or 'ISO-8859-1'. + * The default is system-dependant, typically UTF-8. Note that this is + * only used when creating new documents, not when reading existing content. + * (See convert_to_encoding below.) + * - parser_flags: An OR-combined set of parser flags. The flags supported + * by the DOMDocument PHP class are all supported here. + * - omit_xml_declaration: Boolean. If this is TRUE, then certain output + * methods (like {@link QueryPath::xml()}) will omit the XML declaration + * from the beginning of a document. + * - replace_entities: Boolean. If this is TRUE, then any of the insertion + * functions (before(), append(), etc.) will replace named entities with + * their decimal equivalent, and will replace un-escaped ampersands with + * a numeric entity equivalent. + * - ignore_parser_warnings: Boolean. If this is TRUE, then E_WARNING messages + * generated by the XML parser will not cause QueryPath to throw an exception. + * This is useful when parsing + * badly mangled HTML, or when failure to find files should not result in + * an exception. By default, this is FALSE -- that is, parsing warnings and + * IO warnings throw exceptions. + * - convert_to_encoding: Use the MB library to convert the document to the + * named encoding before parsing. This is useful for old HTML (set it to + * iso-8859-1 for best results). If this is not supplied, no character set + * conversion will be performed. See {@link mb_convert_encoding()}. + * (QueryPath 1.3 and later) + * - convert_from_encoding: If 'convert_to_encoding' is set, this option can be + * used to explicitly define what character set the source document is using. + * By default, QueryPath will allow the MB library to guess the encoding. + * (QueryPath 1.3 and later) + * - strip_low_ascii: If this is set to TRUE then markup will have all low ASCII + * characters (<32) stripped out before parsing. This is good in cases where + * icky HTML has (illegal) low characters in the document. + * - use_parser: If 'xml', Parse the document as XML. If 'html', parse the + * document as HTML. Note that the XML parser is very strict, while the + * HTML parser is more lenient, but does enforce some of the DTD/Schema. + * By default, QueryPath autodetects the type. + * - escape_xhtml_js_css_sections: XHTML needs script and css sections to be + * escaped. Yet older readers do not handle CDATA sections, and comments do not + * work properly (for numerous reasons). By default, QueryPath's *XHTML methods + * will wrap a script body with a CDATA declaration inside of C-style comments. + * If you want to change this, you can set this option with one of the + * JS_CSS_ESCAPE_* constants, or you can write your own. + * - QueryPath_class: (ADVANCED) Use this to set the actual classname that + * {@link qp()} loads as a QueryPath instance. It is assumed that the + * class is either {@link QueryPath} or a subclass thereof. See the test + * cases for an example. + * + * @ingroup querypath_core + * @param mixed $document + * A document in one of the forms listed above. + * @param string $string + * A CSS 3 selector. + * @param array $options + * An associative array of options. Currently supported options are listed above. + * @return QueryPath + */ +function qp($document = NULL, $string = NULL, $options = array()) { + + $qpClass = isset($options['QueryPath_class']) ? $options['QueryPath_class'] : 'QueryPath'; + + $qp = new $qpClass($document, $string, $options); + return $qp; +} + +/** + * A special-purpose version of {@link qp()} designed specifically for HTML. + * + * XHTML (if valid) can be easily parsed by {@link qp()} with no problems. However, + * because of the way that libxml handles HTML, there are several common steps that + * need to be taken to reliably parse non-XML HTML documents. This function is + * a convenience tool for configuring QueryPath to parse HTML. + * + * The following options are automatically set unless overridden: + * - ignore_parser_warnings: TRUE + * - convert_to_encoding: ISO-8859-1 (the best for the HTML parser). + * - convert_from_encoding: auto (autodetect encoding) + * - use_parser: html + * + * Parser warning messages are also suppressed, so if the parser emits a warning, + * the application will not be notified. This is equivalent to + * calling @code@qp()@endcode. + * + * Warning: Character set conversions will only work if the Multi-Byte (mb) library + * is installed and enabled. This is usually enabled, but not always. + * + * @ingroup querypath_core + * @see qp() + */ +function htmlqp($document = NULL, $selector = NULL, $options = array()) { + + // Need a way to force an HTML parse instead of an XML parse when the + // doctype is XHTML, since many XHTML documents are not valid XML + // (because of coding errors, not by design). + + $options += array( + 'ignore_parser_warnings' => TRUE, + 'convert_to_encoding' => 'ISO-8859-1', + 'convert_from_encoding' => 'auto', + //'replace_entities' => TRUE, + 'use_parser' => 'html', + // This is stripping actually necessary low ASCII. + //'strip_low_ascii' => TRUE, + ); + return @qp($document, $selector, $options); +} + +/** + * The Query Path object is the primary tool in this library. + * + * To create a new Query Path, use the {@link qp()} function. + * + * If you are new to these documents, start at the {@link QueryPath.php} page. + * There you will find a quick guide to the tools contained in this project. + * + * A note on serialization: QueryPath uses DOM classes internally, and those + * do not serialize well at all. In addition, QueryPath may contain many + * extensions, and there is no guarantee that extensions can serialize. The + * moral of the story: Don't serialize QueryPath. + * + * @see qp() + * @see QueryPath.php + * @ingroup querypath_core + */ +class QueryPath implements IteratorAggregate, Countable { + + /** + * The version string for this version of QueryPath. + * + * Standard releases will be of the following form: .[.][-STABILITY]. + * + * Examples: + * - 2.0 + * - 2.1.1 + * - 2.0-alpha1 + * + * Developer releases will always be of the form dev-. + * + * @since 2.0 + */ + const VERSION = '2.1.2'; + + /** + * This is a stub HTML 4.01 document. + * + * Using {@link QueryPath::XHTML_STUB} is preferred. + * + * This is primarily for generating legacy HTML content. Modern web applications + * should use {@link QueryPath::XHTML_STUB}. + * + * Use this stub with the HTML familiy of methods ({@link html()}, + * {@link writeHTML()}, {@link innerHTML()}). + */ + const HTML_STUB = ' + + + + Untitled + + + '; + + /** + * This is a stub XHTML document. + * + * Since XHTML is an XML format, you should use XML functions with this document + * fragment. For example, you should use {@link xml()}, {@link innerXML()}, and + * {@link writeXML()}. + * + * This can be passed into {@link qp()} to begin a new basic HTML document. + * + * Example: + * @code + * $qp = qp(QueryPath::XHTML_STUB); // Creates a new XHTML document + * $qp->writeXML(); // Writes the document as well-formed XHTML. + * @endcode + * @since 2.0 + */ + const XHTML_STUB = ' + + + + + Untitled + + + '; + + /** + * Default parser flags. + * + * These are flags that will be used if no global or local flags override them. + * @since 2.0 + */ + const DEFAULT_PARSER_FLAGS = NULL; + + const JS_CSS_ESCAPE_CDATA = '\\1'; + const JS_CSS_ESCAPE_CDATA_CCOMMENT = '/* \\1 */'; + const JS_CSS_ESCAPE_CDATA_DOUBLESLASH = '// \\1'; + const JS_CSS_ESCAPE_NONE = ''; + + //const IGNORE_ERRORS = 1544; //E_NOTICE | E_USER_WARNING | E_USER_NOTICE; + private $errTypes = 771; //E_ERROR; | E_USER_ERROR; + + /** + * The base DOMDocument. + */ + protected $document = NULL; + private $options = array( + 'parser_flags' => NULL, + 'omit_xml_declaration' => FALSE, + 'replace_entities' => FALSE, + 'exception_level' => 771, // E_ERROR | E_USER_ERROR | E_USER_WARNING | E_WARNING + 'ignore_parser_warnings' => FALSE, + 'escape_xhtml_js_css_sections' => self::JS_CSS_ESCAPE_CDATA_CCOMMENT, + ); + /** + * The array of matches. + */ + protected $matches = array(); + /** + * The last array of matches. + */ + protected $last = array(); // Last set of matches. + private $ext = array(); // Extensions array. + + /** + * The number of current matches. + * + * @see count() + */ + public $length = 0; + + /** + * Constructor. + * + * This should not be called directly. Use the {@link qp()} factory function + * instead. + * + * @param mixed $document + * A document-like object. + * @param string $string + * A CSS 3 Selector + * @param array $options + * An associative array of options. + * @see qp() + */ + public function __construct($document = NULL, $string = NULL, $options = array()) { + $string = trim($string); + $this->options = $options + QueryPathOptions::get() + $this->options; + + $parser_flags = isset($options['parser_flags']) ? $options['parser_flags'] : self::DEFAULT_PARSER_FLAGS; + if (!empty($this->options['ignore_parser_warnings'])) { + // Don't convert parser warnings into exceptions. + $this->errTypes = 257; //E_ERROR | E_USER_ERROR; + } + elseif (isset($this->options['exception_level'])) { + // Set the error level at which exceptions will be thrown. By default, + // QueryPath will throw exceptions for + // E_ERROR | E_USER_ERROR | E_WARNING | E_USER_WARNING. + $this->errTypes = $this->options['exception_level']; + } + + // Empty: Just create an empty QP. + if (empty($document)) { + $this->document = isset($this->options['encoding']) ? new DOMDocument('1.0', $this->options['encoding']) : new DOMDocument(); + $this->setMatches(new SplObjectStorage()); + } + // Figure out if document is DOM, HTML/XML, or a filename + elseif (is_object($document)) { + + if ($document instanceof QueryPath) { + $this->matches = $document->get(NULL, TRUE); + if ($this->matches->count() > 0) + $this->document = $this->getFirstMatch()->ownerDocument; + } + elseif ($document instanceof DOMDocument) { + $this->document = $document; + //$this->matches = $this->matches($document->documentElement); + $this->setMatches($document->documentElement); + } + elseif ($document instanceof DOMNode) { + $this->document = $document->ownerDocument; + //$this->matches = array($document); + $this->setMatches($document); + } + elseif ($document instanceof SimpleXMLElement) { + $import = dom_import_simplexml($document); + $this->document = $import->ownerDocument; + //$this->matches = array($import); + $this->setMatches($import); + } + elseif ($document instanceof SplObjectStorage) { + if ($document->count() == 0) { + throw new QueryPathException('Cannot initialize QueryPath from an empty SplObjectStore'); + } + $this->matches = $document; + $this->document = $this->getFirstMatch()->ownerDocument; + } + else { + throw new QueryPathException('Unsupported class type: ' . get_class($document)); + } + } + elseif (is_array($document)) { + //trigger_error('Detected deprecated array support', E_USER_NOTICE); + if (!empty($document) && $document[0] instanceof DOMNode) { + $found = new SplObjectStorage(); + foreach ($document as $item) $found->attach($item); + //$this->matches = $found; + $this->setMatches($found); + $this->document = $this->getFirstMatch()->ownerDocument; + } + } + elseif ($this->isXMLish($document)) { + // $document is a string with XML + $this->document = $this->parseXMLString($document); + $this->setMatches($this->document->documentElement); + } + else { + + // $document is a filename + $context = empty($options['context']) ? NULL : $options['context']; + $this->document = $this->parseXMLFile($document, $parser_flags, $context); + $this->setMatches($this->document->documentElement); + } + + // Do a find if the second param was set. + if (isset($string) && strlen($string) > 0) { + $this->find($string); + } + } + + /** + * A static function for transforming data into a Data URL. + * + * This can be used to create Data URLs for injection into CSS, JavaScript, or other + * non-XML/HTML content. If you are working with QP objects, you may want to use + * {@link dataURL()} instead. + * + * @param mixed $data + * The contents to inject as the data. The value can be any one of the following: + * - A URL: If this is given, then the subsystem will read the content from that URL. THIS + * MUST BE A FULL URL, not a relative path. + * - A string of data: If this is given, then the subsystem will encode the string. + * - A stream or file handle: If this is given, the stream's contents will be encoded + * and inserted as data. + * (Note that we make the assumption here that you would never want to set data to be + * a URL. If this is an incorrect assumption, file a bug.) + * @param string $mime + * The MIME type of the document. + * @param resource $context + * A valid context. Use this only if you need to pass a stream context. This is only necessary + * if $data is a URL. (See {@link stream_context_create()}). + * @return + * An encoded data URL. + */ + public static function encodeDataURL($data, $mime = 'application/octet-stream', $context = NULL) { + if (is_resource($data)) { + $data = stream_get_contents($data); + } + elseif (filter_var($data, FILTER_VALIDATE_URL)) { + $data = file_get_contents($data, FALSE, $context); + } + + $encoded = base64_encode($data); + + return 'data:' . $mime . ';base64,' . $encoded; + } + + /** + * Get the effective options for the current QueryPath object. + * + * This returns an associative array of all of the options as set + * for the current QueryPath object. This includes default options, + * options directly passed in via {@link qp()} or the constructor, + * an options set in the {@link QueryPathOptions} object. + * + * The order of merging options is this: + * - Options passed in using {@link qp()} are highest priority, and will + * override other options. + * - Options set with {@link QueryPathOptions} will override default options, + * but can be overridden by options passed into {@link qp()}. + * - Default options will be used when no overrides are present. + * + * This function will return the options currently used, with the above option + * overriding having been calculated already. + * + * @return array + * An associative array of options, calculated from defaults and overridden + * options. + * @see qp() + * @see QueryPathOptions::set() + * @see QueryPathOptions::merge() + * @since 2.0 + */ + public function getOptions() { + return $this->options; + } + + /** + * Select the root element of the document. + * + * This sets the current match to the document's root element. For + * practical purposes, this is the same as: + * @code + * qp($someDoc)->find(':root'); + * @endcode + * However, since it doesn't invoke a parser, it has less overhead. It also + * works in cases where the QueryPath has been reduced to zero elements (a + * case that is not handled by find(':root') because there is no element + * whose root can be found). + * + * @param string $selector + * A selector. If this is supplied, QueryPath will navigate to the + * document root and then run the query. (Added in QueryPath 2.0 Beta 2) + * @return QueryPath + * The QueryPath object, wrapping the root element (document element) + * for the current document. + */ + public function top($selector = NULL) { + $this->setMatches($this->document->documentElement); + // print '=====================' . PHP_EOL; + // var_dump($this->document); + // print '=====================' . PHP_EOL; + return !empty($selector) ? $this->find($selector) : $this; + } + + /** + * Given a CSS Selector, find matching items. + * + * @param string $selector + * CSS 3 Selector + * @return QueryPath + * @see filter() + * @see is() + * @todo If a find() returns zero matches, then a subsequent find() will + * also return zero matches, even if that find has a selector like :root. + * The reason for this is that the {@link QueryPathCssEventHandler} does + * not set the root of the document tree if it cannot find any elements + * from which to determine what the root is. The workaround is to use + * {@link top()} to select the root element again. + */ + public function find($selector) { + + // Optimize for ID/Class searches. These two take a long time + // when a rdp is used. Using an XPath pushes work to C code. + $ids = array(); + + $regex = '/^#([\w-]+)$|^\.([\w-]+)$/'; // $1 is ID, $2 is class. + //$regex = '/^#([\w-]+)$/'; + if (preg_match($regex, $selector, $ids) === 1) { + // If $1 is a match, we have an ID. + if (!empty($ids[1])) { + $xpath = new DOMXPath($this->document); + foreach ($this->matches as $item) { + + // For whatever reasons, the .// does not work correctly + // if the selected element is the root element. So we have + // an awful hack. + if ($item->isSameNode($this->document->documentElement) ) { + $xpathQuery = "//*[@id='{$ids[1]}']"; + } + // This is the correct XPath query. + else { + $xpathQuery = ".//*[@id='{$ids[1]}']"; + } + //$nl = $xpath->query("//*[@id='{$ids[1]}']", $item); + //$nl = $xpath->query(".//*[@id='{$ids[1]}']", $item); + $nl = $xpath->query($xpathQuery, $item); + if ($nl->length > 0) { + $this->setMatches($nl->item(0)); + break; + } + else { + // If no match is found, we set an empty. + $this->noMatches(); + } + } + } + // Quick search for class values. While the XPath can't do it + // all, it is faster than doing a recusive node search. + else { + $xpath = new DOMXPath($this->document); + $found = new SplObjectStorage(); + foreach ($this->matches as $item) { + // See comments on this in the #id code above. + if ($item->isSameNode($this->document->documentElement) ) { + $xpathQuery = "//*[@class]"; + } + // This is the correct XPath query. + else { + $xpathQuery = ".//*[@class]"; + } + $nl = $xpath->query($xpathQuery, $item); + for ($i = 0; $i < $nl->length; ++$i) { + $vals = explode(' ', $nl->item($i)->getAttribute('class')); + if (in_array($ids[2], $vals)) $found->attach($nl->item($i)); + } + } + $this->setMatches($found); + } + + return $this; + } + + + $query = new QueryPathCssEventHandler($this->matches); + $query->find($selector); + //$this->matches = $query->getMatches(); + $this->setMatches($query->getMatches()); + return $this; + } + + /** + * Execute an XPath query and store the results in the QueryPath. + * + * Most methods in this class support CSS 3 Selectors. Sometimes, though, + * XPath provides a finer-grained query language. Use this to execute + * XPath queries. + * + * Beware, though. QueryPath works best on DOM Elements, but an XPath + * query can return other nodes, strings, and values. These may not work with + * other QueryPath functions (though you will be able to access the + * values with {@link get()}). + * + * @param string $query + * An XPath query. + * @param array $options + * Currently supported options are: + * - 'namespace_prefix': And XML namespace prefix to be used as the default. Used + * in conjunction with 'namespace_uri' + * - 'namespace_uri': The URI to be used as the default namespace URI. Used + * with 'namespace_prefix' + * @return QueryPath + * A QueryPath object wrapping the results of the query. + * @see find() + * @author M Butcher + * @author Xavier Prud'homme + */ + public function xpath($query, $options = array()) { + $xpath = new DOMXPath($this->document); + + // Register a default namespace. + if (!empty($options['namespace_prefix']) && !empty($options['namespace_uri'])) { + $xpath->registerNamespace($options['namespace_prefix'], $options['namespace_uri']); + } + + $found = new SplObjectStorage(); + foreach ($this->matches as $item) { + $nl = $xpath->query($query, $item); + if ($nl->length > 0) { + for ($i = 0; $i < $nl->length; ++$i) $found->attach($nl->item($i)); + } + } + $this->setMatches($found); + return $this; + } + + /** + * Get the number of elements currently wrapped by this object. + * + * Note that there is no length property on this object. + * + * @return int + * Number of items in the object. + * @deprecated QueryPath now implements Countable, so use count(). + */ + public function size() { + return $this->matches->count(); + } + + /** + * Get the number of elements currently wrapped by this object. + * + * Since QueryPath is Countable, the PHP count() function can also + * be used on a QueryPath. + * + * @code + * + * @endcode + * + * @return int + * The number of matches in the QueryPath. + */ + public function count() { + return $this->matches->count(); + } + + /** + * Get one or all elements from this object. + * + * When called with no paramaters, this returns all objects wrapped by + * the QueryPath. Typically, these are DOMElement objects (unless you have + * used {@link map()}, {@link xpath()}, or other methods that can select + * non-elements). + * + * When called with an index, it will return the item in the QueryPath with + * that index number. + * + * Calling this method does not change the QueryPath (e.g. it is + * non-destructive). + * + * You can use qp()->get() to iterate over all elements matched. You can + * also iterate over qp() itself (QueryPath implementations must be Traversable). + * In the later case, though, each item + * will be wrapped in a QueryPath object. To learn more about iterating + * in QueryPath, see {@link examples/techniques.php}. + * + * @param int $index + * If specified, then only this index value will be returned. If this + * index is out of bounds, a NULL will be returned. + * @param boolean $asObject + * If this is TRUE, an {@link SplObjectStorage} object will be returned + * instead of an array. This is the preferred method for extensions to use. + * @return mixed + * If an index is passed, one element will be returned. If no index is + * present, an array of all matches will be returned. + * @see eq() + * @see SplObjectStorage + */ + public function get($index = NULL, $asObject = FALSE) { + if (isset($index)) { + return ($this->size() > $index) ? $this->getNthMatch($index) : NULL; + } + // Retain support for legacy. + if (!$asObject) { + $matches = array(); + foreach ($this->matches as $m) $matches[] = $m; + return $matches; + } + return $this->matches; + } + + /** + * Get the DOMDocument that we currently work with. + * + * This returns the current DOMDocument. Any changes made to this document will be + * accessible to QueryPath, as both will share access to the same object. + * + * @return DOMDocument + */ + public function document() { + return $this->document; + } + + /** + * On an XML document, load all XIncludes. + * + * @return QueryPath + */ + public function xinclude() { + $this->document->xinclude(); + return $this; + } + + /** + * Get all current elements wrapped in an array. + * Compatibility function for jQuery 1.4, but identical to calling {@link get()} + * with no parameters. + * + * @return array + * An array of DOMNodes (typically DOMElements). + */ + public function toArray() { + return $this->get(); + } + /** + * Get/set an attribute. + * - If no parameters are specified, this returns an associative array of all + * name/value pairs. + * - If both $name and $value are set, then this will set the attribute name/value + * pair for all items in this object. + * - If $name is set, and is an array, then + * all attributes in the array will be set for all items in this object. + * - If $name is a string and is set, then the attribute value will be returned. + * + * When an attribute value is retrieved, only the attribute value of the FIRST + * match is returned. + * + * @param mixed $name + * The name of the attribute or an associative array of name/value pairs. + * @param string $value + * A value (used only when setting an individual property). + * @return mixed + * If this was a setter request, return the QueryPath object. If this was + * an access request (getter), return the string value. + * @see removeAttr() + * @see tag() + * @see hasAttr() + * @see hasClass() + */ + public function attr($name = NULL, $value = NULL) { + + // Default case: Return all attributes as an assoc array. + if (is_null($name)) { + if ($this->matches->count() == 0) return NULL; + $ele = $this->getFirstMatch(); + $buffer = array(); + + // This does not appear to be part of the DOM + // spec. Nor is it documented. But it works. + foreach ($ele->attributes as $name => $attrNode) { + $buffer[$name] = $attrNode->value; + } + return $buffer; + } + + // multi-setter + if (is_array($name)) { + foreach ($name as $k => $v) { + foreach ($this->matches as $m) $m->setAttribute($k, $v); + } + return $this; + } + // setter + if (isset($value)) { + foreach ($this->matches as $m) $m->setAttribute($name, $value); + return $this; + } + + //getter + if ($this->matches->count() == 0) return NULL; + + // Special node type handler: + if ($name == 'nodeType') { + return $this->getFirstMatch()->nodeType; + } + + // Always return first match's attr. + return $this->getFirstMatch()->getAttribute($name); + } + /** + * Check to see if the given attribute is present. + * + * This returns TRUE if all selected items have the attribute, or + * FALSE if at least one item does not have the attribute. + * + * @param string $attrName + * The attribute name. + * @return boolean + * TRUE if all matches have the attribute, FALSE otherwise. + * @since 2.0 + * @see attr() + * @see hasClass() + */ + public function hasAttr($attrName) { + foreach ($this->matches as $match) { + if (!$match->hasAttribute($attrName)) return FALSE; + } + return TRUE; + } + + /** + * Set/get a CSS value for the current element(s). + * This sets the CSS value for each element in the QueryPath object. + * It does this by setting (or getting) the style attribute (without a namespace). + * + * For example, consider this code: + * @code + * css('background-color','red')->html(); + * ?> + * @endcode + * This will return the following HTML: + * @code + * + * @endcode + * + * If no parameters are passed into this function, then the current style + * element will be returned unparsed. Example: + * @code + * css('background-color','red')->css(); + * ?> + * @endcode + * This will return the following: + * @code + * background-color: red + * @endcode + * + * As of QueryPath 2.1, existing style attributes will be merged with new attributes. + * (In previous versions of QueryPath, a call to css() overwrite the existing style + * values). + * + * @param mixed $name + * If this is a string, it will be used as a CSS name. If it is an array, + * this will assume it is an array of name/value pairs of CSS rules. It will + * apply all rules to all elements in the set. + * @param string $value + * The value to set. This is only set if $name is a string. + * @return QueryPath + */ + public function css($name = NULL, $value = '') { + if (empty($name)) { + return $this->attr('style'); + } + + // Get any existing CSS. + $css = array(); + foreach ($this->matches as $match) { + $style = $match->getAttribute('style'); + if (!empty($style)) { + // XXX: Is this sufficient? + $style_array = explode(';', $style); + foreach ($style_array as $item) { + $item = trim($item); + + // Skip empty attributes. + if (strlen($item) == 0) continue; + + list($css_att, $css_val) = explode(':',$item, 2); + $css[$css_att] = trim($css_val); + } + } + } + + if (is_array($name)) { + // Use array_merge instead of + to preserve order. + $css = array_merge($css, $name); + } + else { + $css[$name] = $value; + } + + // Collapse CSS into a string. + $format = '%s: %s;'; + $css_string = ''; + foreach ($css as $n => $v) { + $css_string .= sprintf($format, $n, trim($v)); + } + + $this->attr('style', $css_string); + return $this; + } + + /** + * Insert or retrieve a Data URL. + * + * When called with just $attr, it will fetch the result, attempt to decode it, and + * return an array with the MIME type and the application data. + * + * When called with both $attr and $data, it will inject the data into all selected elements + * So @code$qp->dataURL('src', file_get_contents('my.png'), 'image/png')@endcode will inject + * the given PNG image into the selected elements. + * + * The current implementation only knows how to encode and decode Base 64 data. + * + * Note that this is known *not* to work on IE 6, but should render fine in other browsers. + * + * @param string $attr + * The name of the attribute. + * @param mixed $data + * The contents to inject as the data. The value can be any one of the following: + * - A URL: If this is given, then the subsystem will read the content from that URL. THIS + * MUST BE A FULL URL, not a relative path. + * - A string of data: If this is given, then the subsystem will encode the string. + * - A stream or file handle: If this is given, the stream's contents will be encoded + * and inserted as data. + * (Note that we make the assumption here that you would never want to set data to be + * a URL. If this is an incorrect assumption, file a bug.) + * @param string $mime + * The MIME type of the document. + * @param resource $context + * A valid context. Use this only if you need to pass a stream context. This is only necessary + * if $data is a URL. (See {@link stream_context_create()}). + * @return + * If this is called as a setter, this will return a QueryPath object. Otherwise, it + * will attempt to fetch data out of the attribute and return that. + * @see http://en.wikipedia.org/wiki/Data:_URL + * @see attr() + * @since 2.1 + */ + public function dataURL($attr, $data = NULL, $mime = 'application/octet-stream', $context = NULL) { + if (is_null($data)) { + // Attempt to fetch the data + $data = $this->attr($attr); + if (empty($data) || is_array($data) || strpos($data, 'data:') !== 0) { + return; + } + + // So 1 and 2 should be MIME types, and 3 should be the base64-encoded data. + $regex = '/^data:([a-zA-Z0-9]+)\/([a-zA-Z0-9]+);base64,(.*)$/'; + $matches = array(); + preg_match($regex, $data, $matches); + + if (!empty($matches)) { + $result = array( + 'mime' => $matches[1] . '/' . $matches[2], + 'data' => base64_decode($matches[3]), + ); + return $result; + } + } + else { + + $attVal = self::encodeDataURL($data, $mime, $context); + + return $this->attr($attr, $attVal); + + } + } + + + + /** + * Remove the named attribute from all elements in the current QueryPath. + * + * This will remove any attribute with the given name. It will do this on each + * item currently wrapped by QueryPath. + * + * As is the case in jQuery, this operation is not considered destructive. + * + * @param string $name + * Name of the parameter to remove. + * @return QueryPath + * The QueryPath object with the same elements. + * @see attr() + */ + public function removeAttr($name) { + foreach ($this->matches as $m) { + //if ($m->hasAttribute($name)) + $m->removeAttribute($name); + } + return $this; + } + /** + * Reduce the matched set to just one. + * + * This will take a matched set and reduce it to just one item -- the item + * at the index specified. This is a destructive operation, and can be undone + * with {@link end()}. + * + * @param $index + * The index of the element to keep. The rest will be + * discarded. + * @return QueryPath + * @see get() + * @see is() + * @see end() + */ + public function eq($index) { + // XXX: Might there be a more efficient way of doing this? + $this->setMatches($this->getNthMatch($index)); + return $this; + } + /** + * Given a selector, this checks to see if the current set has one or more matches. + * + * Unlike jQuery's version, this supports full selectors (not just simple ones). + * + * @param string $selector + * The selector to search for. As of QueryPath 2.1.1, this also supports passing a + * DOMNode object. + * @return boolean + * TRUE if one or more elements match. FALSE if no match is found. + * @see get() + * @see eq() + */ + public function is($selector) { + + if (is_object($selector)) { + if ($selector instanceof DOMNode) { + return count($this->matches) == 1 && $selector->isSameNode($this->get(0)); + } + elseif ($selector instanceof Traversable) { + if (count($selector) != count($this->matches)) { + return FALSE; + } + // Without $seen, there is an edge case here if $selector contains the same object + // more than once, but the counts are equal. For example, [a, a, a, a] will + // pass an is() on [a, b, c, d]. We use the $seen SPLOS to prevent this. + $seen = new SplObjectStorage(); + foreach ($selector as $item) { + if (!$this->matches->contains($item) || $seen->contains($item)) { + return FALSE; + } + $seen->attach($item); + } + return TRUE; + } + throw new Exception('Cannot compare an object to a QueryPath.'); + return FALSE; + } + + foreach ($this->matches as $m) { + $q = new QueryPathCssEventHandler($m); + if ($q->find($selector)->getMatches()->count()) { + return TRUE; + } + } + return FALSE; + } + /** + * Filter a list down to only elements that match the selector. + * Use this, for example, to find all elements with a class, or with + * certain children. + * + * @param string $selector + * The selector to use as a filter. + * @return QueryPath + * The QueryPath with non-matching items filtered out. + * @see filterLambda() + * @see filterCallback() + * @see map() + * @see find() + * @see is() + */ + public function filter($selector) { + $found = new SplObjectStorage(); + foreach ($this->matches as $m) if (qp($m, NULL, $this->options)->is($selector)) $found->attach($m); + $this->setMatches($found); + return $this; + } + /** + * Filter based on a lambda function. + * + * The function string will be executed as if it were the body of a + * function. It is passed two arguments: + * - $index: The index of the item. + * - $item: The current Element. + * If the function returns boolean FALSE, the item will be removed from + * the list of elements. Otherwise it will be kept. + * + * Example: + * @code + * qp('li')->filterLambda('qp($item)->attr("id") == "test"'); + * @endcode + * + * The above would filter down the list to only an item whose ID is + * 'text'. + * + * @param string $fn + * Inline lambda function in a string. + * @return QueryPath + * @see filter() + * @see map() + * @see mapLambda() + * @see filterCallback() + */ + public function filterLambda($fn) { + $function = create_function('$index, $item', $fn); + $found = new SplObjectStorage(); + $i = 0; + foreach ($this->matches as $item) + if ($function($i++, $item) !== FALSE) $found->attach($item); + + $this->setMatches($found); + return $this; + } + + /** + * Use regular expressions to filter based on the text content of matched elements. + * + * Only items that match the given regular expression will be kept. All others will + * be removed. + * + * The regular expression is run against the text content (the PCDATA) of the + * elements. This is a way of filtering elements based on their content. + * + * Example: + * @code + * + *

Hello World

+ * @endcode + * + * @code + * filterPreg('/World/')->size(); + * ?> + * @endcode + * + * The return value above will be 1 because the text content of @codeqp($xml, 'div')@endcode is + * @codeHello World@endcode. + * + * Compare this to the behavior of the :contains() CSS3 pseudo-class. + * + * @param string $regex + * A regular expression. + * @return QueryPath + * @see filter() + * @see filterCallback() + * @see preg_match() + */ + public function filterPreg($regex) { + + $found = new SplObjectStorage(); + + foreach ($this->matches as $item) { + if (preg_match($regex, $item->textContent) > 0) { + $found->attach($item); + } + } + $this->setMatches($found); + + return $this; + } + /** + * Filter based on a callback function. + * + * A callback may be any of the following: + * - a function: 'my_func'. + * - an object/method combo: $obj, 'myMethod' + * - a class/method combo: 'MyClass', 'myMethod' + * Note that classes are passed in strings. Objects are not. + * + * Each callback is passed to arguments: + * - $index: The index position of the object in the array. + * - $item: The item to be operated upon. + * + * If the callback function returns FALSE, the item will be removed from the + * set of matches. Otherwise the item will be considered a match and left alone. + * + * @param callback $callback. + * A callback either as a string (function) or an array (object, method OR + * classname, method). + * @return QueryPath + * Query path object augmented according to the function. + * @see filter() + * @see filterLambda() + * @see map() + * @see is() + * @see find() + */ + public function filterCallback($callback) { + $found = new SplObjectStorage(); + $i = 0; + if (is_callable($callback)) { + foreach($this->matches as $item) + if (call_user_func($callback, $i++, $item) !== FALSE) $found->attach($item); + } + else { + throw new QueryPathException('The specified callback is not callable.'); + } + $this->setMatches($found); + return $this; + } + /** + * Filter a list to contain only items that do NOT match. + * + * @param string $selector + * A selector to use as a negation filter. If the filter is matched, the + * element will be removed from the list. + * @return QueryPath + * The QueryPath object with matching items filtered out. + * @see find() + */ + public function not($selector) { + $found = new SplObjectStorage(); + if ($selector instanceof DOMElement) { + foreach ($this->matches as $m) if ($m !== $selector) $found->attach($m); + } + elseif (is_array($selector)) { + foreach ($this->matches as $m) { + if (!in_array($m, $selector, TRUE)) $found->attach($m); + } + } + elseif ($selector instanceof SplObjectStorage) { + foreach ($this->matches as $m) if ($selector->contains($m)) $found->attach($m); + } + else { + foreach ($this->matches as $m) if (!qp($m, NULL, $this->options)->is($selector)) $found->attach($m); + } + $this->setMatches($found); + return $this; + } + /** + * Get an item's index. + * + * Given a DOMElement, get the index from the matches. This is the + * converse of {@link get()}. + * + * @param DOMElement $subject + * The item to match. + * + * @return mixed + * The index as an integer (if found), or boolean FALSE. Since 0 is a + * valid index, you should use strong equality (===) to test.. + * @see get() + * @see is() + */ + public function index($subject) { + + $i = 0; + foreach ($this->matches as $m) { + if ($m === $subject) { + return $i; + } + ++$i; + } + return FALSE; + } + /** + * Run a function on each item in a set. + * + * The mapping callback can return anything. Whatever it returns will be + * stored as a match in the set, though. This means that afer a map call, + * there is no guarantee that the elements in the set will behave correctly + * with other QueryPath functions. + * + * Callback rules: + * - If the callback returns NULL, the item will be removed from the array. + * - If the callback returns an array, the entire array will be stored in + * the results. + * - If the callback returns anything else, it will be appended to the array + * of matches. + * + * @param callback $callback + * The function or callback to use. The callback will be passed two params: + * - $index: The index position in the list of items wrapped by this object. + * - $item: The current item. + * + * @return QueryPath + * The QueryPath object wrapping a list of whatever values were returned + * by each run of the callback. + * + * @see QueryPath::get() + * @see filter() + * @see find() + */ + public function map($callback) { + $found = new SplObjectStorage(); + + if (is_callable($callback)) { + $i = 0; + foreach ($this->matches as $item) { + $c = call_user_func($callback, $i, $item); + if (isset($c)) { + if (is_array($c) || $c instanceof Iterable) { + foreach ($c as $retval) { + if (!is_object($retval)) { + $tmp = new stdClass(); + $tmp->textContent = $retval; + $retval = $tmp; + } + $found->attach($retval); + } + } + else { + if (!is_object($c)) { + $tmp = new stdClass(); + $tmp->textContent = $c; + $c = $tmp; + } + $found->attach($c); + } + } + ++$i; + } + } + else { + throw new QueryPathException('Callback is not callable.'); + } + $this->setMatches($found, FALSE); + return $this; + } + /** + * Narrow the items in this object down to only a slice of the starting items. + * + * @param integer $start + * Where in the list of matches to begin the slice. + * @param integer $length + * The number of items to include in the slice. If nothing is specified, the + * all remaining matches (from $start onward) will be included in the sliced + * list. + * @return QueryPath + * @see array_slice() + */ + public function slice($start, $length = 0) { + $end = $length; + $found = new SplObjectStorage(); + if ($start >= $this->size()) { + $this->setMatches($found); + return $this; + } + + $i = $j = 0; + foreach ($this->matches as $m) { + if ($i >= $start) { + if ($end > 0 && $j >= $end) { + break; + } + $found->attach($m); + ++$j; + } + ++$i; + } + + $this->setMatches($found); + return $this; + } + /** + * Run a callback on each item in the list of items. + * + * Rules of the callback: + * - A callback is passed two variables: $index and $item. (There is no + * special treatment of $this, as there is in jQuery.) + * - You will want to pass $item by reference if it is not an + * object (DOMNodes are all objects). + * - A callback that returns FALSE will stop execution of the each() loop. This + * works like break in a standard loop. + * - A TRUE return value from the callback is analogous to a continue statement. + * - All other return values are ignored. + * + * @param callback $callback + * The callback to run. + * @return QueryPath + * The QueryPath. + * @see eachLambda() + * @see filter() + * @see map() + */ + public function each($callback) { + if (is_callable($callback)) { + $i = 0; + foreach ($this->matches as $item) { + if (call_user_func($callback, $i, $item) === FALSE) return $this; + ++$i; + } + } + else { + throw new QueryPathException('Callback is not callable.'); + } + return $this; + } + /** + * An each() iterator that takes a lambda function. + * + * @param string $lambda + * The lambda function. This will be passed ($index, &$item). + * @return QueryPath + * The QueryPath object. + * @see each() + * @see filterLambda() + * @see filterCallback() + * @see map() + */ + public function eachLambda($lambda) { + $index = 0; + foreach ($this->matches as $item) { + $fn = create_function('$index, &$item', $lambda); + if ($fn($index, $item) === FALSE) return $this; + ++$index; + } + return $this; + } + /** + * Insert the given markup as the last child. + * + * The markup will be inserted into each match in the set. + * + * The same element cannot be inserted multiple times into a document. DOM + * documents do not allow a single object to be inserted multiple times + * into the DOM. To insert the same XML repeatedly, we must first clone + * the object. This has one practical implication: Once you have inserted + * an element into the object, you cannot further manipulate the original + * element and expect the changes to be replciated in the appended object. + * (They are not the same -- there is no shared reference.) Instead, you + * will need to retrieve the appended object and operate on that. + * + * @param mixed $data + * This can be either a string (the usual case), or a DOM Element. + * @return QueryPath + * The QueryPath object. + * @see appendTo() + * @see prepend() + * @throws QueryPathException + * Thrown if $data is an unsupported object type. + */ + public function append($data) { + $data = $this->prepareInsert($data); + if (isset($data)) { + if (empty($this->document->documentElement) && $this->matches->count() == 0) { + // Then we assume we are writing to the doc root + $this->document->appendChild($data); + $found = new SplObjectStorage(); + $found->attach($this->document->documentElement); + $this->setMatches($found); + } + else { + // You can only append in item once. So in cases where we + // need to append multiple times, we have to clone the node. + foreach ($this->matches as $m) { + // DOMDocumentFragments are even more troublesome, as they don't + // always clone correctly. So we have to clone their children. + if ($data instanceof DOMDocumentFragment) { + foreach ($data->childNodes as $n) + $m->appendChild($n->cloneNode(TRUE)); + } + else { + // Otherwise a standard clone will do. + $m->appendChild($data->cloneNode(TRUE)); + } + + } + } + + } + return $this; + } + /** + * Append the current elements to the destination passed into the function. + * + * This cycles through all of the current matches and appends them to + * the context given in $destination. If a selector is provided then the + * $destination is queried (using that selector) prior to the data being + * appended. The data is then appended to the found items. + * + * @param QueryPath $dest + * A QueryPath object that will be appended to. + * @return QueryPath + * The original QueryPath, unaltered. Only the destination QueryPath will + * be modified. + * @see append() + * @see prependTo() + * @throws QueryPathException + * Thrown if $data is an unsupported object type. + */ + public function appendTo(QueryPath $dest) { + foreach ($this->matches as $m) $dest->append($m); + return $this; + } + /** + * Insert the given markup as the first child. + * + * The markup will be inserted into each match in the set. + * + * @param mixed $data + * This can be either a string (the usual case), or a DOM Element. + * @return QueryPath + * @see append() + * @see before() + * @see after() + * @see prependTo() + * @throws QueryPathException + * Thrown if $data is an unsupported object type. + */ + public function prepend($data) { + $data = $this->prepareInsert($data); + if (isset($data)) { + foreach ($this->matches as $m) { + $ins = $data->cloneNode(TRUE); + if ($m->hasChildNodes()) + $m->insertBefore($ins, $m->childNodes->item(0)); + else + $m->appendChild($ins); + } + } + return $this; + } + /** + * Take all nodes in the current object and prepend them to the children nodes of + * each matched node in the passed-in QueryPath object. + * + * This will iterate through each item in the current QueryPath object and + * add each item to the beginning of the children of each element in the + * passed-in QueryPath object. + * + * @see insertBefore() + * @see insertAfter() + * @see prepend() + * @see appendTo() + * @param QueryPath $dest + * The destination QueryPath object. + * @return QueryPath + * The original QueryPath, unmodified. NOT the destination QueryPath. + * @throws QueryPathException + * Thrown if $data is an unsupported object type. + */ + public function prependTo(QueryPath $dest) { + foreach ($this->matches as $m) $dest->prepend($m); + return $this; + } + + /** + * Insert the given data before each element in the current set of matches. + * + * This will take the give data (XML or HTML) and put it before each of the items that + * the QueryPath object currently contains. Contrast this with after(). + * + * @param mixed $data + * The data to be inserted. This can be XML in a string, a DomFragment, a DOMElement, + * or the other usual suspects. (See {@link qp()}). + * @return QueryPath + * Returns the QueryPath with the new modifications. The list of elements currently + * selected will remain the same. + * @see insertBefore() + * @see after() + * @see append() + * @see prepend() + * @throws QueryPathException + * Thrown if $data is an unsupported object type. + */ + public function before($data) { + $data = $this->prepareInsert($data); + foreach ($this->matches as $m) { + $ins = $data->cloneNode(TRUE); + $m->parentNode->insertBefore($ins, $m); + } + + return $this; + } + /** + * Insert the current elements into the destination document. + * The items are inserted before each element in the given QueryPath document. + * That is, they will be siblings with the current elements. + * + * @param QueryPath $dest + * Destination QueryPath document. + * @return QueryPath + * The current QueryPath object, unaltered. Only the destination QueryPath + * object is altered. + * @see before() + * @see insertAfter() + * @see appendTo() + * @throws QueryPathException + * Thrown if $data is an unsupported object type. + */ + public function insertBefore(QueryPath $dest) { + foreach ($this->matches as $m) $dest->before($m); + return $this; + } + /** + * Insert the contents of the current QueryPath after the nodes in the + * destination QueryPath object. + * + * @param QueryPath $dest + * Destination object where the current elements will be deposited. + * @return QueryPath + * The present QueryPath, unaltered. Only the destination object is altered. + * @see after() + * @see insertBefore() + * @see append() + * @throws QueryPathException + * Thrown if $data is an unsupported object type. + */ + public function insertAfter(QueryPath $dest) { + foreach ($this->matches as $m) $dest->after($m); + return $this; + } + /** + * Insert the given data after each element in the current QueryPath object. + * + * This inserts the element as a peer to the currently matched elements. + * Contrast this with {@link append()}, which inserts the data as children + * of matched elements. + * + * @param mixed $data + * The data to be appended. + * @return QueryPath + * The QueryPath object (with the items inserted). + * @see before() + * @see append() + * @throws QueryPathException + * Thrown if $data is an unsupported object type. + */ + public function after($data) { + $data = $this->prepareInsert($data); + foreach ($this->matches as $m) { + $ins = $data->cloneNode(TRUE); + if (isset($m->nextSibling)) + $m->parentNode->insertBefore($ins, $m->nextSibling); + else + $m->parentNode->appendChild($ins); + } + return $this; + } + /** + * Replace the existing element(s) in the list with a new one. + * + * @param mixed $new + * A DOMElement or XML in a string. This will replace all elements + * currently wrapped in the QueryPath object. + * @return QueryPath + * The QueryPath object wrapping the items that were removed. + * This remains consistent with the jQuery API. + * @see append() + * @see prepend() + * @see before() + * @see after() + * @see remove() + * @see replaceAll() + */ + public function replaceWith($new) { + $data = $this->prepareInsert($new); + $found = new SplObjectStorage(); + foreach ($this->matches as $m) { + $parent = $m->parentNode; + $parent->insertBefore($data->cloneNode(TRUE), $m); + $found->attach($parent->removeChild($m)); + } + $this->setMatches($found); + return $this; + } + /** + * Remove the parent element from the selected node or nodes. + * + * This takes the given list of nodes and "unwraps" them, moving them out of their parent + * node, and then deleting the parent node. + * + * For example, consider this: + * + * @code + * + * @endcode + * + * Now we can run this code: + * @code + * qp($xml, 'content')->unwrap(); + * @endcode + * + * This will result in: + * + * @code + * + * @endcode + * This is the opposite of {@link wrap()}. + * + * The root element cannot be unwrapped. It has no parents. + * If you attempt to use unwrap on a root element, this will throw a QueryPathException. + * (You can, however, "Unwrap" a child that is a direct descendant of the root element. This + * will remove the root element, and replace the child as the root element. Be careful, though. + * You cannot set more than one child as a root element.) + * + * @return QueryPath + * The QueryPath object, with the same element(s) selected. + * @throws QueryPathException + * An exception is thrown if one attempts to unwrap a root element. + * @see wrap() + * @since 2.1 + * @author mbutcher + */ + public function unwrap() { + + // We do this in two loops in order to + // capture the case where two matches are + // under the same parent. Othwerwise we might + // remove a match before we can move it. + $parents = new SplObjectStorage(); + foreach ($this->matches as $m) { + + // Cannot unwrap the root element. + if ($m->isSameNode($m->ownerDocument->documentElement)) { + throw new QueryPathException('Cannot unwrap the root element.'); + } + + // Move children to peer of parent. + $parent = $m->parentNode; + $old = $parent->removeChild($m); + $parent->parentNode->insertBefore($old, $parent); + $parents->attach($parent); + } + + // Now that all the children are moved, we + // remove all of the parents. + foreach ($parents as $ele) { + $ele->parentNode->removeChild($ele); + } + + return $this; + } + /** + * Wrap each element inside of the given markup. + * + * Markup is usually a string, but it can also be a DOMNode, a document + * fragment, a SimpleXMLElement, or another QueryPath object (in which case + * the first item in the list will be used.) + * + * @param mixed $markup + * Markup that will wrap each element in the current list. + * @return QueryPath + * The QueryPath object with the wrapping changes made. + * @see wrapAll() + * @see wrapInner() + */ + public function wrap($markup) { + $data = $this->prepareInsert($markup); + + // If the markup passed in is empty, we don't do any wrapping. + if (empty($data)) { + return $this; + } + + foreach ($this->matches as $m) { + $copy = $data->firstChild->cloneNode(TRUE); + + // XXX: Should be able to avoid doing this over and over. + if ($copy->hasChildNodes()) { + $deepest = $this->deepestNode($copy); + // FIXME: Does this need a different data structure? + $bottom = $deepest[0]; + } + else + $bottom = $copy; + + $parent = $m->parentNode; + $parent->insertBefore($copy, $m); + $m = $parent->removeChild($m); + $bottom->appendChild($m); + //$parent->appendChild($copy); + } + return $this; + } + /** + * Wrap all elements inside of the given markup. + * + * So all elements will be grouped together under this single marked up + * item. This works by first determining the parent element of the first item + * in the list. It then moves all of the matching elements under the wrapper + * and inserts the wrapper where that first element was found. (This is in + * accordance with the way jQuery works.) + * + * Markup is usually XML in a string, but it can also be a DOMNode, a document + * fragment, a SimpleXMLElement, or another QueryPath object (in which case + * the first item in the list will be used.) + * + * @param string $markup + * Markup that will wrap all elements in the current list. + * @return QueryPath + * The QueryPath object with the wrapping changes made. + * @see wrap() + * @see wrapInner() + */ + public function wrapAll($markup) { + if ($this->matches->count() == 0) return; + + $data = $this->prepareInsert($markup); + + if (empty($data)) { + return $this; + } + + if ($data->hasChildNodes()) { + $deepest = $this->deepestNode($data); + // FIXME: Does this need fixing? + $bottom = $deepest[0]; + } + else + $bottom = $data; + + $first = $this->getFirstMatch(); + $parent = $first->parentNode; + $parent->insertBefore($data, $first); + foreach ($this->matches as $m) { + $bottom->appendChild($m->parentNode->removeChild($m)); + } + return $this; + } + /** + * Wrap the child elements of each item in the list with the given markup. + * + * Markup is usually a string, but it can also be a DOMNode, a document + * fragment, a SimpleXMLElement, or another QueryPath object (in which case + * the first item in the list will be used.) + * + * @param string $markup + * Markup that will wrap children of each element in the current list. + * @return QueryPath + * The QueryPath object with the wrapping changes made. + * @see wrap() + * @see wrapAll() + */ + public function wrapInner($markup) { + $data = $this->prepareInsert($markup); + + // No data? Short circuit. + if (empty($data)) return $this; + + if ($data->hasChildNodes()) { + $deepest = $this->deepestNode($data); + // FIXME: ??? + $bottom = $deepest[0]; + } + else + $bottom = $data; + + foreach ($this->matches as $m) { + if ($m->hasChildNodes()) { + while($m->firstChild) { + $kid = $m->removeChild($m->firstChild); + $bottom->appendChild($kid); + } + } + $m->appendChild($data); + } + return $this; + } + /** + * Reduce the set of matches to the deepest child node in the tree. + * + * This loops through the matches and looks for the deepest child node of all of + * the matches. "Deepest", here, is relative to the nodes in the list. It is + * calculated as the distance from the starting node to the most distant child + * node. In other words, it is not necessarily the farthest node from the root + * element, but the farthest note from the matched element. + * + * In the case where there are multiple nodes at the same depth, all of the + * nodes at that depth will be included. + * + * @return QueryPath + * The QueryPath wrapping the single deepest node. + */ + public function deepest() { + $deepest = 0; + $winner = new SplObjectStorage(); + foreach ($this->matches as $m) { + $local_deepest = 0; + $local_ele = $this->deepestNode($m, 0, NULL, $local_deepest); + + // Replace with the new deepest. + if ($local_deepest > $deepest) { + $winner = new SplObjectStorage(); + foreach ($local_ele as $lele) $winner->attach($lele); + $deepest = $local_deepest; + } + // Augument with other equally deep elements. + elseif ($local_deepest == $deepest) { + foreach ($local_ele as $lele) + $winner->attach($lele); + } + } + $this->setMatches($winner); + return $this; + } + + /** + * A depth-checking function. Typically, it only needs to be + * invoked with the first parameter. The rest are used for recursion. + * @see deepest(); + * @param DOMNode $ele + * The element. + * @param int $depth + * The depth guage + * @param mixed $current + * The current set. + * @param DOMNode $deepest + * A reference to the current deepest node. + * @return array + * Returns an array of DOM nodes. + */ + protected function deepestNode(DOMNode $ele, $depth = 0, $current = NULL, &$deepest = NULL) { + // FIXME: Should this use SplObjectStorage? + if (!isset($current)) $current = array($ele); + if (!isset($deepest)) $deepest = $depth; + if ($ele->hasChildNodes()) { + foreach ($ele->childNodes as $child) { + if ($child->nodeType === XML_ELEMENT_NODE) { + $current = $this->deepestNode($child, $depth + 1, $current, $deepest); + } + } + } + elseif ($depth > $deepest) { + $current = array($ele); + $deepest = $depth; + } + elseif ($depth === $deepest) { + $current[] = $ele; + } + return $current; + } + + /** + * Prepare an item for insertion into a DOM. + * + * This handles a variety of boilerplate tasks that need doing before an + * indeterminate object can be inserted into a DOM tree. + * - If item is a string, this is converted into a document fragment and returned. + * - If item is a QueryPath, then the first item is retrieved and this call function + * is called recursivel. + * - If the item is a DOMNode, it is imported into the current DOM if necessary. + * - If the item is a SimpleXMLElement, it is converted into a DOM node and then + * imported. + * + * @param mixed $item + * Item to prepare for insert. + * @return mixed + * Returns the prepared item. + * @throws QueryPathException + * Thrown if the object passed in is not of a supprted object type. + */ + protected function prepareInsert($item) { + if(empty($item)) { + return; + } + elseif (is_string($item)) { + // If configured to do so, replace all entities. + if ($this->options['replace_entities']) { + $item = QueryPathEntities::replaceAllEntities($item); + } + + $frag = $this->document->createDocumentFragment(); + try { + set_error_handler(array('QueryPathParseException', 'initializeFromError'), $this->errTypes); + $frag->appendXML($item); + } + // Simulate a finally block. + catch (Exception $e) { + restore_error_handler(); + throw $e; + } + restore_error_handler(); + return $frag; + } + elseif ($item instanceof QueryPath) { + if ($item->size() == 0) + return; + + return $this->prepareInsert($item->get(0)); + } + elseif ($item instanceof DOMNode) { + if ($item->ownerDocument !== $this->document) { + // Deep clone this and attach it to this document + $item = $this->document->importNode($item, TRUE); + } + return $item; + } + elseif ($item instanceof SimpleXMLElement) { + $element = dom_import_simplexml($item); + return $this->document->importNode($element, TRUE); + } + // What should we do here? + //var_dump($item); + throw new QueryPathException("Cannot prepare item of unsupported type: " . gettype($item)); + } + /** + * The tag name of the first element in the list. + * + * This returns the tag name of the first element in the list of matches. If + * the list is empty, an empty string will be used. + * + * @see replaceAll() + * @see replaceWith() + * @return string + * The tag name of the first element in the list. + */ + public function tag() { + return ($this->size() > 0) ? $this->getFirstMatch()->tagName : ''; + } + /** + * Remove any items from the list if they match the selector. + * + * In other words, each item that matches the selector will be remove + * from the DOM document. The returned QueryPath wraps the list of + * removed elements. + * + * If no selector is specified, this will remove all current matches from + * the document. + * + * @param string $selector + * A CSS Selector. + * @return QueryPath + * The Query path wrapping a list of removed items. + * @see replaceAll() + * @see replaceWith() + * @see removeChildren() + */ + public function remove($selector = NULL) { + if(!empty($selector)) { + // Do a non-destructive find. + $query = new QueryPathCssEventHandler($this->matches); + $query->find($selector); + $matches = $query->getMatches(); + } + else { + $matches = $this->matches; + } + + $found = new SplObjectStorage(); + foreach ($matches as $item) { + // The item returned is (according to docs) different from + // the one passed in, so we have to re-store it. + $found->attach($item->parentNode->removeChild($item)); + } + + // Return a clone QueryPath with just the removed items. If + // no items are found, this will return an empty QueryPath. + return count($found) == 0 ? new QueryPath() : new QueryPath($found); + } + /** + * This replaces everything that matches the selector with the first value + * in the current list. + * + * This is the reverse of replaceWith. + * + * Unlike jQuery, QueryPath cannot assume a default document. Consequently, + * you must specify the intended destination document. If it is omitted, the + * present document is assumed to be tthe document. However, that can result + * in undefined behavior if the selector and the replacement are not sufficiently + * distinct. + * + * @param string $selector + * The selector. + * @param DOMDocument $document + * The destination document. + * @return QueryPath + * The QueryPath wrapping the modified document. + * @deprecated Due to the fact that this is not a particularly friendly method, + * and that it can be easily replicated using {@see replaceWith()}, it is to be + * considered deprecated. + * @see remove() + * @see replaceWith() + */ + public function replaceAll($selector, DOMDocument $document) { + $replacement = $this->size() > 0 ? $this->getFirstMatch() : $this->document->createTextNode(''); + + $c = new QueryPathCssEventHandler($document); + $c->find($selector); + $temp = $c->getMatches(); + foreach ($temp as $item) { + $node = $replacement->cloneNode(); + $node = $document->importNode($node); + $item->parentNode->replaceChild($node, $item); + } + return qp($document, NULL, $this->options); + } + /** + * Add more elements to the current set of matches. + * + * This begins the new query at the top of the DOM again. The results found + * when running this selector are then merged into the existing results. In + * this way, you can add additional elements to the existing set. + * + * @param string $selector + * A valid selector. + * @return QueryPath + * The QueryPath object with the newly added elements. + * @see append() + * @see after() + * @see andSelf() + * @see end() + */ + public function add($selector) { + + // This is destructive, so we need to set $last: + $this->last = $this->matches; + + foreach (qp($this->document, $selector, $this->options)->get() as $item) + $this->matches->attach($item); + return $this; + } + /** + * Revert to the previous set of matches. + * + * This will revert back to the last set of matches (before the last + * "destructive" set of operations). This undoes any change made to the set of + * matched objects. Functions like find() and filter() change the + * list of matched objects. The end() function will revert back to the last set of + * matched items. + * + * Note that functions that modify the document, but do not change the list of + * matched objects, are not "destructive". Thus, calling append('something')->end() + * will not undo the append() call. + * + * Only one level of changes is stored. Reverting beyond that will result in + * an empty set of matches. Example: + * + * @code + * // The line below returns the same thing as qp(document, 'p'); + * qp(document, 'p')->find('div')->end(); + * // This returns an empty array: + * qp(document, 'p')->end(); + * // This returns an empty array: + * qp(document, 'p')->find('div')->find('span')->end()->end(); + * @endcode + * + * The last one returns an empty array because only one level of changes is stored. + * + * @return QueryPath + * A QueryPath object reflecting the list of matches prior to the last destructive + * operation. + * @see andSelf() + * @see add() + */ + public function end() { + // Note that this does not use setMatches because it must set the previous + // set of matches to empty array. + $this->matches = $this->last; + $this->last = new SplObjectStorage(); + return $this; + } + /** + * Combine the current and previous set of matched objects. + * + * Example: + * + * @code + * qp(document, 'p')->find('div')->andSelf(); + * @endcode + * + * The code above will contain a list of all p elements and all div elements that + * are beneath p elements. + * + * @see end(); + * @return QueryPath + * A QueryPath object with the results of the last two "destructive" operations. + * @see add() + * @see end() + */ + public function andSelf() { + // This is destructive, so we need to set $last: + $last = $this->matches; + + foreach ($this->last as $item) $this->matches->attach($item); + + $this->last = $last; + return $this; + } + /** + * Remove all child nodes. + * + * This is equivalent to jQuery's empty() function. (However, empty() is a + * PHP built-in, and cannot be used as a method name.) + * + * @return QueryPath + * The QueryPath object with the child nodes removed. + * @see replaceWith() + * @see replaceAll() + * @see remove() + */ + public function removeChildren() { + foreach ($this->matches as $m) { + while($kid = $m->firstChild) { + $m->removeChild($kid); + } + } + return $this; + } + /** + * Get the children of the elements in the QueryPath object. + * + * If a selector is provided, the list of children will be filtered through + * the selector. + * + * @param string $selector + * A valid selector. + * @return QueryPath + * A QueryPath wrapping all of the children. + * @see removeChildren() + * @see parent() + * @see parents() + * @see next() + * @see prev() + */ + public function children($selector = NULL) { + $found = new SplObjectStorage(); + foreach ($this->matches as $m) { + foreach($m->childNodes as $c) { + if ($c->nodeType == XML_ELEMENT_NODE) $found->attach($c); + } + } + if (empty($selector)) { + $this->setMatches($found); + } + else { + $this->matches = $found; // Don't buffer this. It is temporary. + $this->filter($selector); + } + return $this; + } + /** + * Get all child nodes (not just elements) of all items in the matched set. + * + * It gets only the immediate children, not all nodes in the subtree. + * + * This does not process iframes. Xinclude processing is dependent on the + * DOM implementation and configuration. + * + * @return QueryPath + * A QueryPath object wrapping all child nodes for all elements in the + * QueryPath object. + * @see find() + * @see text() + * @see html() + * @see innerHTML() + * @see xml() + * @see innerXML() + */ + public function contents() { + $found = new SplObjectStorage(); + foreach ($this->matches as $m) { + if (empty($m->childNodes)) continue; // Issue #51 + foreach ($m->childNodes as $c) { + $found->attach($c); + } + } + $this->setMatches($found); + return $this; + } + /** + * Get a list of siblings for elements currently wrapped by this object. + * + * This will compile a list of every sibling of every element in the + * current list of elements. + * + * Note that if two siblings are present in the QueryPath object to begin with, + * then both will be returned in the matched set, since they are siblings of each + * other. In other words,if the matches contain a and b, and a and b are siblings of + * each other, than running siblings will return a set that contains + * both a and b. + * + * @param string $selector + * If the optional selector is provided, siblings will be filtered through + * this expression. + * @return QueryPath + * The QueryPath containing the matched siblings. + * @see contents() + * @see children() + * @see parent() + * @see parents() + */ + public function siblings($selector = NULL) { + $found = new SplObjectStorage(); + foreach ($this->matches as $m) { + $parent = $m->parentNode; + foreach ($parent->childNodes as $n) { + if ($n->nodeType == XML_ELEMENT_NODE && $n !== $m) { + $found->attach($n); + } + } + } + if (empty($selector)) { + $this->setMatches($found); + } + else { + $this->matches = $found; // Don't buffer this. It is temporary. + $this->filter($selector); + } + return $this; + } + /** + * Find the closest element matching the selector. + * + * This finds the closest match in the ancestry chain. It first checks the + * present element. If the present element does not match, this traverses up + * the ancestry chain (e.g. checks each parent) looking for an item that matches. + * + * It is provided for jQuery 1.3 compatibility. + * @param string $selector + * A CSS Selector to match. + * @return QueryPath + * The set of matches. + * @since 2.0 + */ + public function closest($selector) { + $found = new SplObjectStorage(); + foreach ($this->matches as $m) { + + if (qp($m, NULL, $this->options)->is($selector) > 0) { + $found->attach($m); + } + else { + while ($m->parentNode->nodeType !== XML_DOCUMENT_NODE) { + $m = $m->parentNode; + // Is there any case where parent node is not an element? + if ($m->nodeType === XML_ELEMENT_NODE && qp($m, NULL, $this->options)->is($selector) > 0) { + $found->attach($m); + break; + } + } + } + + } + $this->setMatches($found); + return $this; + } + /** + * Get the immediate parent of each element in the QueryPath. + * + * If a selector is passed, this will return the nearest matching parent for + * each element in the QueryPath. + * + * @param string $selector + * A valid CSS3 selector. + * @return QueryPath + * A QueryPath object wrapping the matching parents. + * @see children() + * @see siblings() + * @see parents() + */ + public function parent($selector = NULL) { + $found = new SplObjectStorage(); + foreach ($this->matches as $m) { + while ($m->parentNode->nodeType !== XML_DOCUMENT_NODE) { + $m = $m->parentNode; + // Is there any case where parent node is not an element? + if ($m->nodeType === XML_ELEMENT_NODE) { + if (!empty($selector)) { + if (qp($m, NULL, $this->options)->is($selector) > 0) { + $found->attach($m); + break; + } + } + else { + $found->attach($m); + break; + } + } + } + } + $this->setMatches($found); + return $this; + } + /** + * Get all ancestors of each element in the QueryPath. + * + * If a selector is present, only matching ancestors will be retrieved. + * + * @see parent() + * @param string $selector + * A valid CSS 3 Selector. + * @return QueryPath + * A QueryPath object containing the matching ancestors. + * @see siblings() + * @see children() + */ + public function parents($selector = NULL) { + $found = new SplObjectStorage(); + foreach ($this->matches as $m) { + while ($m->parentNode->nodeType !== XML_DOCUMENT_NODE) { + $m = $m->parentNode; + // Is there any case where parent node is not an element? + if ($m->nodeType === XML_ELEMENT_NODE) { + if (!empty($selector)) { + if (qp($m, NULL, $this->options)->is($selector) > 0) + $found->attach($m); + } + else + $found->attach($m); + } + } + } + $this->setMatches($found); + return $this; + } + /** + * Set or get the markup for an element. + * + * If $markup is set, then the giving markup will be injected into each + * item in the set. All other children of that node will be deleted, and this + * new code will be the only child or children. The markup MUST BE WELL FORMED. + * + * If no markup is given, this will return a string representing the child + * markup of the first node. + * + * Important: This differs from jQuery's html() function. This function + * returns the current node and all of its children. jQuery returns only + * the children. This means you do not need to do things like this: + * @code$qp->parent()->html()@endcode. + * + * By default, this is HTML 4.01, not XHTML. Use {@link xml()} for XHTML. + * + * @param string $markup + * The text to insert. + * @return mixed + * A string if no markup was passed, or a QueryPath if markup was passed. + * @see xml() + * @see text() + * @see contents() + */ + public function html($markup = NULL) { + if (isset($markup)) { + + if ($this->options['replace_entities']) { + $markup = QueryPathEntities::replaceAllEntities($markup); + } + + // Parse the HTML and insert it into the DOM + //$doc = DOMDocument::loadHTML($markup); + $doc = $this->document->createDocumentFragment(); + $doc->appendXML($markup); + $this->removeChildren(); + $this->append($doc); + return $this; + } + $length = $this->size(); + if ($length == 0) { + return NULL; + } + // Only return the first item -- that's what JQ does. + $first = $this->getFirstMatch(); + + // Catch cases where first item is not a legit DOM object. + if (!($first instanceof DOMNode)) { + return NULL; + } + + // Added by eabrand. + if(!$first->ownerDocument->documentElement) { + return NULL; + } + + if ($first instanceof DOMDocument || $first->isSameNode($first->ownerDocument->documentElement)) { + return $this->document->saveHTML(); + } + // saveHTML cannot take a node and serialize it. + return $this->document->saveXML($first); + } + + /** + * Fetch the HTML contents INSIDE of the first QueryPath item. + * + * This behaves the way jQuery's @codehtml()@endcode function behaves. + * + * This gets all children of the first match in QueryPath. + * + * Consider this fragment: + * @code + *

+ * test

foo

test + *

+ * @endcode + * + * We can retrieve just the contents of this code by doing something like + * this: + * @code + * qp($xml, 'div')->innerHTML(); + * @endcode + * + * This would return the following: + * @codetest

foo

test@endcode + * + * @return string + * Returns a string representation of the child nodes of the first + * matched element. + * @see html() + * @see innerXML() + * @see innerXHTML() + * @since 2.0 + */ + public function innerHTML() { + return $this->innerXML(); + } + + /** + * Fetch child (inner) nodes of the first match. + * + * This will return the children of the present match. For an example, + * see {@link innerHTML()}. + * + * @see innerHTML() + * @see innerXML() + * @return string + * Returns a string of XHTML that represents the children of the present + * node. + * @since 2.0 + */ + public function innerXHTML() { + $length = $this->size(); + if ($length == 0) { + return NULL; + } + // Only return the first item -- that's what JQ does. + $first = $this->getFirstMatch(); + + // Catch cases where first item is not a legit DOM object. + if (!($first instanceof DOMNode)) { + return NULL; + } + elseif (!$first->hasChildNodes()) { + return ''; + } + + $buffer = ''; + foreach ($first->childNodes as $child) { + $buffer .= $this->document->saveXML($child, LIBXML_NOEMPTYTAG); + } + + return $buffer; + } + + /** + * Fetch child (inner) nodes of the first match. + * + * This will return the children of the present match. For an example, + * see {@link innerHTML()}. + * + * @see innerHTML() + * @see innerXHTML() + * @return string + * Returns a string of XHTML that represents the children of the present + * node. + * @since 2.0 + */ + public function innerXML() { + $length = $this->size(); + if ($length == 0) { + return NULL; + } + // Only return the first item -- that's what JQ does. + $first = $this->getFirstMatch(); + + // Catch cases where first item is not a legit DOM object. + if (!($first instanceof DOMNode)) { + return NULL; + } + elseif (!$first->hasChildNodes()) { + return ''; + } + + $buffer = ''; + foreach ($first->childNodes as $child) { + $buffer .= $this->document->saveXML($child); + } + + return $buffer; + } + + /** + * Retrieve the text of each match and concatenate them with the given separator. + * + * This has the effect of looping through all children, retrieving their text + * content, and then concatenating the text with a separator. + * + * @param string $sep + * The string used to separate text items. The default is a comma followed by a + * space. + * @param boolean $filterEmpties + * If this is true, empty items will be ignored. + * @return string + * The text contents, concatenated together with the given separator between + * every pair of items. + * @see implode() + * @see text() + * @since 2.0 + */ + public function textImplode($sep = ', ', $filterEmpties = TRUE) { + $tmp = array(); + foreach ($this->matches as $m) { + $txt = $m->textContent; + $trimmed = trim($txt); + // If filter empties out, then we only add items that have content. + if ($filterEmpties) { + if (strlen($trimmed) > 0) $tmp[] = $txt; + } + // Else add all content, even if it's empty. + else { + $tmp[] = $txt; + } + } + return implode($sep, $tmp); + } + /** + * Get the text contents from just child elements. + * + * This is a specialized variant of textImplode() that implodes text for just the + * child elements of the current element. + * + * @param string $separator + * The separator that will be inserted between found text content. + * @return string + * The concatenated values of all children. + */ + function childrenText($separator = ' ') { + // Branch makes it non-destructive. + return $this->branch()->xpath('descendant::text()')->textImplode($separator); + } + /** + * Get or set the text contents of a node. + * @param string $text + * If this is not NULL, this value will be set as the text of the node. It + * will replace any existing content. + * @return mixed + * A QueryPath if $text is set, or the text content if no text + * is passed in as a pram. + * @see html() + * @see xml() + * @see contents() + */ + public function text($text = NULL) { + if (isset($text)) { + $this->removeChildren(); + $textNode = $this->document->createTextNode($text); + foreach ($this->matches as $m) $m->appendChild($textNode); + return $this; + } + // Returns all text as one string: + $buf = ''; + foreach ($this->matches as $m) $buf .= $m->textContent; + return $buf; + } + /** + * Get or set the text before each selected item. + * + * If $text is passed in, the text is inserted before each currently selected item. + * + * If no text is given, this will return the concatenated text after each selected element. + * + * @code + * FooBar'; + * + * // This will return 'Foo' + * qp($xml, 'a')->textBefore(); + * + * // This will insert 'Baz' right before . + * qp($xml, 'b')->textBefore('Baz'); + * ?> + * @endcode + * + * @param string $text + * If this is set, it will be inserted before each node in the current set of + * selected items. + * @return mixed + * Returns the QueryPath object if $text was set, and returns a string (possibly empty) + * if no param is passed. + */ + public function textBefore($text = NULL) { + if (isset($text)) { + $textNode = $this->document->createTextNode($text); + return $this->before($textNode); + } + $buffer = ''; + foreach ($this->matches as $m) { + $p = $m; + while (isset($p->previousSibling) && $p->previousSibling->nodeType == XML_TEXT_NODE) { + $p = $p->previousSibling; + $buffer .= $p->textContent; + } + } + return $buffer; + } + + public function textAfter($text = NULL) { + if (isset($text)) { + $textNode = $this->document->createTextNode($text); + return $this->after($textNode); + } + $buffer = ''; + foreach ($this->matches as $m) { + $n = $m; + while (isset($n->nextSibling) && $n->nextSibling->nodeType == XML_TEXT_NODE) { + $n = $n->nextSibling; + $buffer .= $n->textContent; + } + } + return $buffer; + } + + /** + * Set or get the value of an element's 'value' attribute. + * + * The 'value' attribute is common in HTML form elements. This is a + * convenience function for accessing the values. Since this is not common + * task on the server side, this method may be removed in future releases. (It + * is currently provided for jQuery compatibility.) + * + * If a value is provided in the params, then the value will be set for all + * matches. If no params are given, then the value of the first matched element + * will be returned. This may be NULL. + * + * @deprecated Just use attr(). There's no reason to use this on the server. + * @see attr() + * @param string $value + * @return mixed + * Returns a QueryPath if a string was passed in, and a string if no string + * was passed in. In the later case, an error will produce NULL. + */ + public function val($value = NULL) { + if (isset($value)) { + $this->attr('value', $value); + return $this; + } + return $this->attr('value'); + } + /** + * Set or get XHTML markup for an element or elements. + * + * This differs from {@link html()} in that it processes (and produces) + * strictly XML 1.0 compliant markup. + * + * Like {@link xml()} and {@link html()}, this functions as both a + * setter and a getter. + * + * This is a convenience function for fetching HTML in XML format. + * It does no processing of the markup (such as schema validation). + * @param string $markup + * A string containing XML data. + * @return mixed + * If markup is passed in, a QueryPath is returned. If no markup is passed + * in, XML representing the first matched element is returned. + * @see html() + * @see innerXHTML() + */ + public function xhtml($markup = NULL) { + + // XXX: This is a minor reworking of the original xml() method. + // This should be refactored, probably. + // See http://github.com/technosophos/querypath/issues#issue/10 + + $omit_xml_decl = $this->options['omit_xml_declaration']; + if ($markup === TRUE) { + // Basically, we handle the special case where we don't + // want the XML declaration to be displayed. + $omit_xml_decl = TRUE; + } + elseif (isset($markup)) { + return $this->xml($markup); + } + + $length = $this->size(); + if ($length == 0) { + return NULL; + } + + // Only return the first item -- that's what JQ does. + $first = $this->getFirstMatch(); + // Catch cases where first item is not a legit DOM object. + if (!($first instanceof DOMNode)) { + return NULL; + } + + if ($first instanceof DOMDocument || $first->isSameNode($first->ownerDocument->documentElement)) { + + // Has the unfortunate side-effect of stripping doctype. + //$text = ($omit_xml_decl ? $this->document->saveXML($first->ownerDocument->documentElement, LIBXML_NOEMPTYTAG) : $this->document->saveXML(NULL, LIBXML_NOEMPTYTAG)); + $text = $this->document->saveXML(NULL, LIBXML_NOEMPTYTAG); + } + else { + $text = $this->document->saveXML($first, LIBXML_NOEMPTYTAG); + } + + // Issue #47: Using the old trick for removing the XML tag also removed the + // doctype. So we remove it with a regex: + if ($omit_xml_decl) { + $text = preg_replace('/<\?xml\s[^>]*\?>/', '', $text); + } + + // This is slightly lenient: It allows for cases where code incorrectly places content + // inside of these supposedly unary elements. + $unary = '/<(area|base|basefont|br|col|frame|hr|img|input|isindex|link|meta|param)(?(?=\s)([^>\/]+))><\/[^>]*>/i'; + $text = preg_replace($unary, '<\\1\\2 />', $text); + + // Experimental: Support for enclosing CDATA sections with comments to be both XML compat + // and HTML 4/5 compat + $cdata = '/()/i'; + $replace = $this->options['escape_xhtml_js_css_sections']; + $text = preg_replace($cdata, $replace, $text); + + return $text; + } + /** + * Set or get the XML markup for an element or elements. + * + * Like {@link html()}, this functions in both a setter and a getter mode. + * + * In setter mode, the string passed in will be parsed and then appended to the + * elements wrapped by this QueryPath object.When in setter mode, this parses + * the XML using the DOMFragment parser. For that reason, an XML declaration + * is not necessary. + * + * In getter mode, the first element wrapped by this QueryPath object will be + * converted to an XML string and returned. + * + * @param string $markup + * A string containing XML data. + * @return mixed + * If markup is passed in, a QueryPath is returned. If no markup is passed + * in, XML representing the first matched element is returned. + * @see xhtml() + * @see html() + * @see text() + * @see content() + * @see innerXML() + */ + public function xml($markup = NULL) { + $omit_xml_decl = $this->options['omit_xml_declaration']; + if ($markup === TRUE) { + // Basically, we handle the special case where we don't + // want the XML declaration to be displayed. + $omit_xml_decl = TRUE; + } + elseif (isset($markup)) { + if ($this->options['replace_entities']) { + $markup = QueryPathEntities::replaceAllEntities($markup); + } + $doc = $this->document->createDocumentFragment(); + $doc->appendXML($markup); + $this->removeChildren(); + $this->append($doc); + return $this; + } + $length = $this->size(); + if ($length == 0) { + return NULL; + } + // Only return the first item -- that's what JQ does. + $first = $this->getFirstMatch(); + + // Catch cases where first item is not a legit DOM object. + if (!($first instanceof DOMNode)) { + return NULL; + } + + if ($first instanceof DOMDocument || $first->isSameNode($first->ownerDocument->documentElement)) { + + return ($omit_xml_decl ? $this->document->saveXML($first->ownerDocument->documentElement) : $this->document->saveXML()); + } + return $this->document->saveXML($first); + } + /** + * Send the XML document to the client. + * + * Write the document to a file path, if given, or + * to stdout (usually the client). + * + * This prints the entire document. + * + * @param string $path + * The path to the file into which the XML should be written. if + * this is NULL, data will be written to STDOUT, which is usually + * sent to the remote browser. + * @param int $options + * (As of QueryPath 2.1) Pass libxml options to the saving mechanism. + * @return QueryPath + * The QueryPath object, unmodified. + * @see xml() + * @see innerXML() + * @see writeXHTML() + * @throws Exception + * In the event that a file cannot be written, an Exception will be thrown. + */ + public function writeXML($path = NULL, $options = NULL) { + if ($path == NULL) { + print $this->document->saveXML(NULL, $options); + } + else { + try { + set_error_handler(array('QueryPathIOException', 'initializeFromError')); + $this->document->save($path, $options); + } + catch (Exception $e) { + restore_error_handler(); + throw $e; + } + restore_error_handler(); + } + return $this; + } + /** + * Writes HTML to output. + * + * HTML is formatted as HTML 4.01, without strict XML unary tags. This is for + * legacy HTML content. Modern XHTML should be written using {@link toXHTML()}. + * + * Write the document to stdout (usually the client) or to a file. + * + * @param string $path + * The path to the file into which the XML should be written. if + * this is NULL, data will be written to STDOUT, which is usually + * sent to the remote browser. + * @return QueryPath + * The QueryPath object, unmodified. + * @see html() + * @see innerHTML() + * @throws Exception + * In the event that a file cannot be written, an Exception will be thrown. + */ + public function writeHTML($path = NULL) { + if ($path == NULL) { + print $this->document->saveHTML(); + } + else { + try { + set_error_handler(array('QueryPathParseException', 'initializeFromError')); + $this->document->saveHTMLFile($path); + } + catch (Exception $e) { + restore_error_handler(); + throw $e; + } + restore_error_handler(); + } + return $this; + } + + /** + * Write an XHTML file to output. + * + * Typically, you should use this instead of {@link writeHTML()}. + * + * Currently, this functions identically to {@link toXML()} except that + * it always uses closing tags (e.g. always @code@endcode, + * never @code