From 9a9c04512e5dcb77c7fe5d850e3f2a0250cc160e Mon Sep 17 00:00:00 2001 From: emkael Date: Wed, 18 Jan 2017 20:07:16 +0100 Subject: * Motor Sport Magazine feed provider --- lib/querypath/.gitignore | 14 + lib/querypath/API | 61 + lib/querypath/COPYING-MIT.txt | 22 + lib/querypath/CREDITS | 14 + lib/querypath/INSTALL | 80 + lib/querypath/Makefile | 30 + lib/querypath/README.md | 142 + lib/querypath/RELEASE | 168 + lib/querypath/build.xml | 590 +++ lib/querypath/composer.json | 16 + lib/querypath/config.doxy | 1631 ++++++++ lib/querypath/package.xml | 546 +++ lib/querypath/package_compatible.xml | 56 + lib/querypath/pear-summary.txt | 5 + lib/querypath/quickstart-guide.md | 113 + lib/querypath/src/QueryPath.php | 356 ++ lib/querypath/src/QueryPath/CSS/DOMTraverser.php | 775 ++++ .../src/QueryPath/CSS/DOMTraverser/PseudoClass.php | 421 ++ .../src/QueryPath/CSS/DOMTraverser/Util.php | 139 + lib/querypath/src/QueryPath/CSS/EventHandler.php | 171 + lib/querypath/src/QueryPath/CSS/InputStream.php | 57 + .../src/QueryPath/CSS/NotImplementedException.php | 15 + lib/querypath/src/QueryPath/CSS/ParseException.php | 15 + lib/querypath/src/QueryPath/CSS/Parser.php | 575 +++ .../src/QueryPath/CSS/QueryPathEventHandler.php | 1424 +++++++ lib/querypath/src/QueryPath/CSS/Scanner.php | 306 ++ lib/querypath/src/QueryPath/CSS/Selector.php | 144 + lib/querypath/src/QueryPath/CSS/SimpleSelector.php | 138 + lib/querypath/src/QueryPath/CSS/Token.php | 60 + lib/querypath/src/QueryPath/CSS/Traverser.php | 38 + lib/querypath/src/QueryPath/DOMQuery.php | 3993 ++++++++++++++++++ lib/querypath/src/QueryPath/Entities.php | 162 + lib/querypath/src/QueryPath/Exception.php | 13 + lib/querypath/src/QueryPath/Extension.php | 94 + lib/querypath/src/QueryPath/Extension/QPXML.php | 211 + lib/querypath/src/QueryPath/Extension/QPXSL.php | 76 + lib/querypath/src/QueryPath/ExtensionRegistry.php | 130 + lib/querypath/src/QueryPath/IOException.php | 20 + lib/querypath/src/QueryPath/Options.php | 84 + lib/querypath/src/QueryPath/ParseException.php | 44 + lib/querypath/src/QueryPath/Query.php | 12 + lib/querypath/src/QueryPath/QueryPathIterator.php | 33 + lib/querypath/src/documentation.php | 261 ++ lib/querypath/src/qp.php | 82 + lib/querypath/src/qp_functions.php | 216 + lib/querypath/test/DOMTraverserTest.xml | 35 + .../test/Tests/QueryPath/CSS/DOMTraverserTest.php | 357 ++ .../test/Tests/QueryPath/CSS/ParserTest.php | 520 +++ .../test/Tests/QueryPath/CSS/PseudoClassTest.php | 828 ++++ .../QueryPath/CSS/QueryPathEventHandlerTest.php | 1439 +++++++ .../test/Tests/QueryPath/CSS/SelectorTest.php | 120 + .../test/Tests/QueryPath/CSS/TokenTest.php | 23 + .../test/Tests/QueryPath/CSS/UtilTest.php | 51 + .../test/Tests/QueryPath/DOMQueryTest.php | 1865 +++++++++ .../test/Tests/QueryPath/EntitiesTest.php | 54 + .../test/Tests/QueryPath/ExtensionTest.php | 153 + .../test/Tests/QueryPath/Extensions/QPXMLTest.php | 41 + .../test/Tests/QueryPath/Extensions/QPXSLTest.php | 60 + lib/querypath/test/Tests/QueryPath/OptionsTest.php | 60 + .../test/Tests/QueryPath/QueryPathTest.php | 56 + lib/querypath/test/Tests/QueryPath/TestCase.php | 22 + lib/querypath/test/Tests/QueryPath/XMLIshTest.php | 63 + lib/querypath/test/advanced.xml | 14 + lib/querypath/test/amplify.xml | 4370 ++++++++++++++++++++ lib/querypath/test/coverage.sh | 6 + lib/querypath/test/data.html | 15 + lib/querypath/test/data.xml | 15 + lib/querypath/test/html.xml | 15 + lib/querypath/test/no-writing.xml | 2 + lib/querypath/test/runTests.sh | 8 + lib/querypath/test/test.php | 153 + providers/Motorsport.php | 116 + 72 files changed, 24014 insertions(+) create mode 100644 lib/querypath/.gitignore create mode 100644 lib/querypath/API create mode 100644 lib/querypath/COPYING-MIT.txt create mode 100755 lib/querypath/CREDITS create mode 100644 lib/querypath/INSTALL create mode 100644 lib/querypath/Makefile create mode 100644 lib/querypath/README.md create mode 100644 lib/querypath/RELEASE create mode 100644 lib/querypath/build.xml create mode 100644 lib/querypath/composer.json create mode 100644 lib/querypath/config.doxy create mode 100644 lib/querypath/package.xml create mode 100644 lib/querypath/package_compatible.xml create mode 100644 lib/querypath/pear-summary.txt create mode 100644 lib/querypath/quickstart-guide.md create mode 100644 lib/querypath/src/QueryPath.php create mode 100644 lib/querypath/src/QueryPath/CSS/DOMTraverser.php create mode 100644 lib/querypath/src/QueryPath/CSS/DOMTraverser/PseudoClass.php create mode 100644 lib/querypath/src/QueryPath/CSS/DOMTraverser/Util.php create mode 100644 lib/querypath/src/QueryPath/CSS/EventHandler.php create mode 100644 lib/querypath/src/QueryPath/CSS/InputStream.php create mode 100644 lib/querypath/src/QueryPath/CSS/NotImplementedException.php create mode 100644 lib/querypath/src/QueryPath/CSS/ParseException.php create mode 100644 lib/querypath/src/QueryPath/CSS/Parser.php create mode 100644 lib/querypath/src/QueryPath/CSS/QueryPathEventHandler.php create mode 100644 lib/querypath/src/QueryPath/CSS/Scanner.php create mode 100644 lib/querypath/src/QueryPath/CSS/Selector.php create mode 100644 lib/querypath/src/QueryPath/CSS/SimpleSelector.php create mode 100644 lib/querypath/src/QueryPath/CSS/Token.php create mode 100644 lib/querypath/src/QueryPath/CSS/Traverser.php create mode 100644 lib/querypath/src/QueryPath/DOMQuery.php create mode 100644 lib/querypath/src/QueryPath/Entities.php create mode 100644 lib/querypath/src/QueryPath/Exception.php create mode 100644 lib/querypath/src/QueryPath/Extension.php create mode 100644 lib/querypath/src/QueryPath/Extension/QPXML.php create mode 100644 lib/querypath/src/QueryPath/Extension/QPXSL.php create mode 100644 lib/querypath/src/QueryPath/ExtensionRegistry.php create mode 100644 lib/querypath/src/QueryPath/IOException.php create mode 100644 lib/querypath/src/QueryPath/Options.php create mode 100644 lib/querypath/src/QueryPath/ParseException.php create mode 100644 lib/querypath/src/QueryPath/Query.php create mode 100644 lib/querypath/src/QueryPath/QueryPathIterator.php create mode 100644 lib/querypath/src/documentation.php create mode 100644 lib/querypath/src/qp.php create mode 100644 lib/querypath/src/qp_functions.php create mode 100644 lib/querypath/test/DOMTraverserTest.xml create mode 100644 lib/querypath/test/Tests/QueryPath/CSS/DOMTraverserTest.php create mode 100644 lib/querypath/test/Tests/QueryPath/CSS/ParserTest.php create mode 100644 lib/querypath/test/Tests/QueryPath/CSS/PseudoClassTest.php create mode 100644 lib/querypath/test/Tests/QueryPath/CSS/QueryPathEventHandlerTest.php create mode 100644 lib/querypath/test/Tests/QueryPath/CSS/SelectorTest.php create mode 100644 lib/querypath/test/Tests/QueryPath/CSS/TokenTest.php create mode 100644 lib/querypath/test/Tests/QueryPath/CSS/UtilTest.php create mode 100644 lib/querypath/test/Tests/QueryPath/DOMQueryTest.php create mode 100644 lib/querypath/test/Tests/QueryPath/EntitiesTest.php create mode 100644 lib/querypath/test/Tests/QueryPath/ExtensionTest.php create mode 100644 lib/querypath/test/Tests/QueryPath/Extensions/QPXMLTest.php create mode 100644 lib/querypath/test/Tests/QueryPath/Extensions/QPXSLTest.php create mode 100644 lib/querypath/test/Tests/QueryPath/OptionsTest.php create mode 100644 lib/querypath/test/Tests/QueryPath/QueryPathTest.php create mode 100644 lib/querypath/test/Tests/QueryPath/TestCase.php create mode 100644 lib/querypath/test/Tests/QueryPath/XMLIshTest.php create mode 100644 lib/querypath/test/advanced.xml create mode 100644 lib/querypath/test/amplify.xml create mode 100644 lib/querypath/test/coverage.sh create mode 100644 lib/querypath/test/data.html create mode 100644 lib/querypath/test/data.xml create mode 100644 lib/querypath/test/html.xml create mode 100644 lib/querypath/test/no-writing.xml create mode 100755 lib/querypath/test/runTests.sh create mode 100755 lib/querypath/test/test.php create mode 100644 providers/Motorsport.php diff --git a/lib/querypath/.gitignore b/lib/querypath/.gitignore new file mode 100644 index 0000000..02a5796 --- /dev/null +++ b/lib/querypath/.gitignore @@ -0,0 +1,14 @@ +bin/build +bin/local +bin/*.tgz +bin/*.zip +*.tmproj +releases +dist +test/coverage +test/reports +test/db +docs/* +doc/* +bin/querypath-200x333.png +test/fakepear diff --git a/lib/querypath/API b/lib/querypath/API new file mode 100644 index 0000000..31c9a93 --- /dev/null +++ b/lib/querypath/API @@ -0,0 +1,61 @@ +API Changes in 3.0.0 + +This is a major update. See the main changes for details. + +API Changes in 2.1.1 + + * NEW METHOD: document() returns the DOMDocument + * BUG FIX: Issue #10 has been re-fixed to correctly collapse certain empty tags. + * BUG FIX: Issue #10 has been re-fixed to correctly escape JavaScript for browsers. + * BUG FIX: Issue #47 has been fixed to only remove XML declaration, but leave DOCTYPE. + * NEW ARGUMENT: xpath() now supports $options, which includes the ability to set a namespace. + +API Changes in 2.1 + +All changes are documented in their respective doc blocks. So take a +look at http://api.querypath.org to learn more. + +New Functions + * The `htmlqp()` method has been added for parsing icky HTML. Use + this for web scraping. + +Altered Functions + * The qp() function now supports the following new options: + - convert_to_encoding + - convert_from_encoding + - strip_low_ascii + - use_parser + +New Methods + * attach()/detach() + * has() + * emptyElement() + * even()/odd() + * first()/last() + * firstChild()/lastChild() + * nextUntil()/prevUntil() + * parentsUntil() + * encodeDataURL() + * dataURL() + * filterPreg() + * textBefore()/textAfter() + +Altered Methods + * css() has been changed to allow subsequent calls + to modify the style attribute (issue #28) + * attr() has been changed. If it is called with no + arguments, it now returns all attributes. + +New CSS Selectors Behavior + + * :contains-exactly() performs as :contains() used to perform. + +Altered CSS Selectors Behavior + + * The star operator (*) is now non-greedy, per spec. Before, the star would match + any descendants. Now it will only match children. + * :contains() now does substring matching instead of exact matching. This conforms + to jQuery's behavior. + * Quotes are now checked carefully before being stripped from pseudo-class values. + * Issue #40 identified a potential infinite looping problem on poorly formed selectors. + This was fixed. diff --git a/lib/querypath/COPYING-MIT.txt b/lib/querypath/COPYING-MIT.txt new file mode 100644 index 0000000..3e26983 --- /dev/null +++ b/lib/querypath/COPYING-MIT.txt @@ -0,0 +1,22 @@ +QueryPath: Find your way +Matt Butcher +Copyright (C) 2009-2014 Matt Butcher +Copyright (C) 2015 Google + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/lib/querypath/CREDITS b/lib/querypath/CREDITS new file mode 100755 index 0000000..1c4a940 --- /dev/null +++ b/lib/querypath/CREDITS @@ -0,0 +1,14 @@ +Matt Butcher [technosophos] (lead) +Emily Brand [eabrand] (developer) +Woody Gilk [shadowhand] (contributor) +Xandar Guzman [theshadow] (contributor) +Bobby Jack [fiveminuteargument] (contributor) +Steven Lischer [TomorrowToday] (contributor) +GDMac [GDMac] (contributor) +Bill Ortell [billortell] (contributor) +hakre [hakre] (contributor) +katzwebservices [katzwebservices] (contributor) +Markus Kalkbrenner [mkalkbrenner] (contributor) +Akihiro Yamanoi [noisan] (contributor) +Sandeep Shetty [sandeepshetty] (contributor) +Zemistr [Zemistr] (contributor) diff --git a/lib/querypath/INSTALL b/lib/querypath/INSTALL new file mode 100644 index 0000000..e6f4555 --- /dev/null +++ b/lib/querypath/INSTALL @@ -0,0 +1,80 @@ +INSTALLING QUERYPATH +============================== + +There are two distributions of QueryPath: + +1. The full version, which includes source, documentation, unit tests, +and examples. + +2. The minimal version, which includes only the source code in +compacted form. + +INSTALLING THE MINIMAL VERSION +============================== +To install QueryPath-minimal: +- Extract the contents of the archive: + + tar -zxvf QueryPath-2.0-minimal.tgz + +- Move the extracted directory to the desired location. Generally, + it is best to rename this folder 'QueryPath'. + + mv QueryPath-2.0-minimal myproject/QueryPath + + (Alternately, you may wish to install QueryPath in a location available on + you PHP library path for use in all applications on your system.) + +- Include QueryPath in your scripts: + + require 'QueryPath/QueryPath.php'; + +- The minimal package does not contain any documentation. To view + the documentation online, go to http://api.querypath.org. + +INSTALLING THE FULL VERSION +============================== +To install QueryPath: + +- Extract the contents of the archive. + + tar -zxvf QueryPath-2.0.tgz + +- Copy the src/QueryPath directory to the desired location. + + cp QueryPath-2.0/src/QueryPath myproject + +- Include QueryPath/QueryPath.php in your PHP scripts + + require 'QueryPath/QueryPath.php'; + +The structure of the full version: + +/ -- Base information about the library + src/ -- The source (PHP) code. Include the contents of this directory in your + applications. + test/ -- Unit tests for QueryPath. You can run these with PHPUnit. + examples/ -- Examples scripts written with QueryPath. + docs/ -- Full API documentation in HTML format. You can regenerate this from + the source using PHPDocumentor. + tutorials/ -- DocBook additions to the QueryPath PHPDocumentor data. An HTML + version of this is included in the docs/ directory. The files here are + probably only useful when re-generating the API documentation. + + +INSTALLING FROM GitHub +============================== + +The complete source code and all utilities for QueryPath are located in a GitHub +repository. If you plan on contributing back to QueryPath, you will be able to +achieve the most by working from the GitHub source. Here's how: + +1. Obtain the source code from here: http://github.com/technosophos/querypath + +2. Install PHPDocumentor, PHPUnit, and Phing. XDebug is strongly recommended for + doing coverage analysis. These tools are used to auto-generate + documentation, run unit tests, and manage project builds. + +3. Run 'phing info' from the base QueryPath checkout. This will tell you how to + perform various tasks with QueryPath. + +For more information, visit http://querypath.org. \ No newline at end of file diff --git a/lib/querypath/Makefile b/lib/querypath/Makefile new file mode 100644 index 0000000..7267813 --- /dev/null +++ b/lib/querypath/Makefile @@ -0,0 +1,30 @@ +PROJ := 'QueryPath' +SRCDIR := src +TESTS := test/Tests +VERSION := 'DEV' +DATE := `date "+%Y%m%d"` + +VFILES = src/HPCloud + +docs : + @cat ./config.doxy | sed 's/-UNSTABLE%/$(VERSION)/' | doxygen - + +test : + phpunit --verbose --color --exclude-group=deprecated $(TESTS); + +test-group : + phpunit --verbose --color --group=$(GROUP) $(TESTS); + +fulltest: + phpunit --color $(TESTS) + +lint : $(SRCDIR) + find $(SRCDIR) -iname *.php -exec php -l {} ';' + +dist: tar + +tar: + @echo $(PROJ)-$(VERSION)-$(DATE).tgz + # @tar -zcvf $(PROJ)-$(VERSION)-$(DATE).tgz $(SRCDIR) + +.PHONY: docs test dist tar lint diff --git a/lib/querypath/README.md b/lib/querypath/README.md new file mode 100644 index 0000000..fa4e885 --- /dev/null +++ b/lib/querypath/README.md @@ -0,0 +1,142 @@ +# QueryPath: Find your way. + +Authors: Matt Butcher (lead), Emily Brand, and many others + +[Website](http://querypath.org) | +[API Docs](http://api.querypath.org/docs) | +[VCS and Issue Tracking](http://github.com/technosophos/querypath) | +[Support List](http://groups.google.com/group/support-querypath) | +[Developer List](http://groups.google.com/group/devel-querypath) | +[Pear channel](http://pear.querypath.org) | + +This package is licensed under an MIT license (COPYING-MIT.txt). + +## At A Glance + +QueryPath is a jQuery-like library for working with XML and HTML +documents in PHP. It now contains support for HTML5 via the +[HTML5-PHP project](https://github.com/Masterminds/html5-php). + +### Gettings Started + +Assuming you have successfully installed QueryPath via Composer, you can +parse documents like this: + +``` +require_once "vendor/autoload.php"; + +// HTML5 (new) +$qp = html5qp("path/to/file.html"); + +// Legacy HTML via libxml +$qp = htmlqp("path/to/file.html"); + +// XML or XHTML +$qp = qp("path/to/file.html"); + +// All of the above can take string markup instead of a file name: +$qp = qp("") + +``` + +But the real power comes from chaining. Check out the example below. + +### Example Usage + +Say we have a document like this: +```xml + + + + + + + + +
onetwothree
fourfivesix
+``` + +And say that the above is stored in the variable `$xml`. Now +we can use QueryPath like this: + +```php +attr('foo', 'bar'); + +// Print the contents of the third TD in the second row: +print qp($xml, '#row2>td:nth(3)')->text(); + +// Append another row to the XML and then write the +// result to standard output: +qp($xml, 'tr:last')->after('')->writeXML(); + +?> +``` + +(This example is in `examples/at-a-glance.php`.) + +With over 60 functions and robust support for chaining, you can +accomplish sophisticated XML and HTML processing using QueryPath. + +## QueryPath Installers + +The preferred method of installing QueryPath is via [Composer](http://getcomposer.org). + +You can also download the package from GitHub. + +### Composer (Preferred) + +To add QueryPath as a library in your project, add this to the 'require' +section of your `composer.json`: + +```json +{ + "require": { + "querypath/QueryPath": ">=3.0.0" + } +} +``` + +The run `php composer.phar install` in that directory. + +To stay up to date on stable code, you can use `dev-master` instead of `>=3.0.0`. + +### Manual Install + +You can either download a stable release from the +[GitHub Tags page](https://github.com/technosophos/querypath/tags) +or you can use `git` to clone +[this repository](http://github.com/technosophos/querypath) and work from +the code. + +## Including QueryPath + +As of QueryPath 3.x, QueryPath uses the Composer autoloader if you +installed with composer: +```php + +``` + +Without Composer, you can include QueryPath like this: + +```php + +``` + +QueryPath can also be compiled into a Phar and then included like this: + +```php + +``` + +From there, the main functions you will want to use are `qp()` +(alias of `QueryPath::with()`) and `htmlqp()` (alias of +`QueryPath::withHTML()`). Start with the +[API docs](http://api.querypath.org/docs). diff --git a/lib/querypath/RELEASE b/lib/querypath/RELEASE new file mode 100644 index 0000000..68a9d38 --- /dev/null +++ b/lib/querypath/RELEASE @@ -0,0 +1,168 @@ +# RELEASE NOTES + +3.0.5 (unstable) + + +3.0.4 +- Addition of namespace fetching method ns(). +- Various fixes +- Basic support for HTML5 via Masterminds\HTML5 +- Fixed #164 + +3.0.3 + +- Bug #141: Fixed :gt and :lt pseudoclasses (thanks to lsv) +- Bug #124: Fixed inheritance issue with late static binding (via noisan) +- Bug #126: text(string) was not updating all matched elements (via noisan) +- Bug #127: wrapInner() was mangling the HTML if matched element set was greater than 1 (via noisan) +- Bug #128: Improved jQuery compatibility with wrap*() *pend(), before(), after(), etc. (via noisan) + +3.0.2 + +- Bug #112: children() was not correctly restricting filters. +- Bug #108: QueryPath\Query interface was too restrictive. (via mkalkbrenner) +- Feature #106: :contains() is now case-insensitive. (via katzwebservices) + +3.0.1 + +- Issue #100: qp.php divided into qp.php and qp_functions.php. Composer now includes qp(). (via hakre) +- Issue #102: Added QueryPath.VERSION_MAJOR, changed QueryPath.VERSION +- Issue #97: Rewrite children() and filter() to be a little more efficient. + +3.0.0 + +** REALLY IMPORTANT CHANGE:** To match jQuery, and to correctly implement + a bottom-up parser, the following will no longer work: + $qp($html, 'li')->find(':root ul'); + You can no longer use find() to ever, ever move UP the document true + (like back to the root from somewhere in the tree). You MUST use + top() to do this now. This is how jQuery works, and making this + minor change makes things much faster. + +** REALLY IMPORTANT CHANGE:** Many "destructive" operations now return a + new QueryPath object. This mirrors jQuery's behavior. + + THAT MEANS: find() now works like jQuery find(), branch() is + deprecated. findInPlace() does what find() used to do. + +1. removeAll() now works as it does in recent jQuery. Thanks to GDMac + (issues #77 #78) for the fix. +2. Refactored to use namespaces. +3. Refactored to be SPR-0 compliant. +4. Now uses Composer. +5. The traversal mechanism is now bottom-up, which means Querypath is + faster. +6. Issue #83: Fixed broken forloop in techniques.php. Thanks to BillOrtell for + the fix. +7. ID-based searches no longer guarantee that only one element will be returned. + This accomodates XML documents that may use 'id' in a way different than + HTML. +8. The base CSS Traverser is now optimized for selectors that use ID or + class, but no element. +9. Pseudo-element behavior has been rewritten to better conform to the + standard. Using a pseudo-element no longer changes the match. Rather, + it checks to see if the condition is met by the present element, and + returns TRUE if it does. This means we do not need special case logic + to deal with text fragments. +10. :x-root, :x-reset, and :scope are now implemented using the + same algorithm (they are, in other words, aliases). Since + :scope is part of CSS 4, you should use that. +11. Support for the following CSS 4 Selectors featues has been added: + - :matches() pseudoclass, which behaves the way :has() behaves. + - :any-link + - :local-link (with some restrictions, as it does not know what the real + URL of the document is). + - :scope (see above) +12. Traversing UP the DOM tree with find() is no longer allowed. Use top(). +13. :first is an alias of :nth(1) instead of :first-of-type. This follows + jQuery now. +14. eachLambda() is now deprecated. It WILL be removed in the future. +15. **Extensions:** QPList, QPTPL, and QPDB have all been moved to the new + project QueryPath-Ext. + +2.1.3 + +1. QueryPath Templates have gotten an overhaul from TomorrowToday (Issue #59). + Templates now support attributes. + +2.1.2: + +1. Fixed the case where remove() caused an error when no items were found + to remove (issue #63). Thanks marktheunissen for the bug report and fix. + +2. New XML extensions to deal with adding namespaced elements (#64). Thanks to + theshadow for contributing an entire extension, and to farinspace for + detailed experiments with QP and XML namespaces. + +3. The adjacent CSS selector has been modified to ignore text elements. This + seems to be inline with the spec, but I am not 100% sure. Thanks to + fiveminuteargument for the patch. + +2.1.1: + +1. The xhtml() and writeXHTML() methods now correctly escape JS/CSS and also correctly + fold some tags into unaries will keeping other empty tags. See issues #10, #47. + Thanks to Alex Lawrence for his input. + +2. The method document() has been added. Thanks to Alex Lawrence for suggesting this + addition. + +3. The fetch_rss.php example created broken HREFs in some cases. Thanks to yaph for + the patch. + +4. The xpath() method now supports setting default namespaces. Thanks to Xavier Prud'homme + for a patch. + +5. The remove() method was fixed (issue #55) to now correctly return a QueryPath with + just the removed nodes, while not altering the base QueryPath object. Thanks to MarcusX + for finding and reporting the problem. + +6. Added childrenText() convenience method. (Safe -- no changes to existing functions.) + Thanks to Xavatar for suggestion and proofing initial code. + +7. Fixed bad character stripping in htmlqp() (Issue #58, #52) so that meaningful whitespace + is no longer stripped prior to parsing. Thanks to NortherRaven for detailed report + and help testing and debugging. + +8. Fixed broken :nth-of-type pseudo-class (Issue #57). Thanks to NorthernRaven for the + detailed report and help debugging. + +9. Fixed broken an+b rule handling in the special case '-n+b'. Thanks to NorthernRaven for + reporting and helping out. + +10. Xinclude support has been added via the xinclude() method on QueryPath. Thanks to Crell + for the suggestion and to sdboyer for help (Issue #50). + +11. QueryPath now implements Countable, which means you can do `count(qp($xml, 'div'))`. The + size() function has been marked deprecated. + +12. is() now takes both DOMNodes and Traversables (including QueryPath) as an argument. See + issue #53. + +13. The dirty_html.php example (contributed by Emily Brand, thanks!) is now fixed. Thanks to + MartyIX for tracking down the issue (#59). + +2.1.0: +Big Changes: + +1. There is now an `htmlqp()` function that parses crufty HTML in a far +more reliable way than `qp()`. Use this instead of any variant of +the older `@qp()` setup. + +2. The API has been brought into alignment with jQuery 1.4. See +API-2.1.0 for details. + +3. This release was driven substantially by eabrand's GSOC 2010 +contributions. Thanks, Emily! + +4. There are now Phar and PEAR packages available. Got to +http://pear.querypath.org for PEAR packages. + +5. The minimal QP distribution is no longer minified, as it reportedly +causes XDebug to crash. + +7. Data URs are now supported. QueryPath can now embed images directly +into HTML and XML this way. + +8. Documentation is now in Doxygen instead of PhpDocumentor. Thanks +to Matt Farina and Kevin O'Brien for their input. diff --git a/lib/querypath/build.xml b/lib/querypath/build.xml new file mode 100644 index 0000000..061fe19 --- /dev/null +++ b/lib/querypath/build.xml @@ -0,0 +1,590 @@ + + + + + +To build QueryPath, run: + + phing build + +This will create a generic dev release of QueryPath and copy the releasable files to the dist/ directory. All documentation will be generated, and both a minimal and full version of the code will be generated. The non-compressed files will be available for inspection in bin/build/. + +IMPORTANT: The full version of the package does not include build scripts (including this file). The only way to obtain the entire QueryPath development environment is to check it out from http://github.com/technosophos/querypath. + +A numbered release can be built with: + + phing build -Dversion=2.1-Alpha1 + +These are the basic tasks we anticipate performing with phing. However, the build script supports the running of several other tasks which may help you when debugging or developing QueryPath. Important ones are listed below. A complete list can be obtained by running 'phing -l' in this directory. + +To generate docs, do: + + phing doc + +Documentation will be stored in docs/. You can start with docs/index.html. + +To run unit tests, do: + + phing test + +The above will generate HTML test results which will be placed in test/reports/. If you wish to run the test and print the results directly the the command line, you should run 'phing ftest' instead. + +To run coverage analysis, do: + + phing coverage + +This will create HTML pages describing code coverage. The coverage analysis will be available in test/coverage + +To print this message, do: + + phing info + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${releasedir} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${releasedir} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Setting version string to ${version}. + + + + + + + ${releasedir} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${packagedir}/QueryPath-${version}.tgz + + + + + + + + + + + stable + + + + snapshot + + + + + alpha + + + + + + beta + + + + ${stability} + + diff --git a/lib/querypath/composer.json b/lib/querypath/composer.json new file mode 100644 index 0000000..63e1c35 --- /dev/null +++ b/lib/querypath/composer.json @@ -0,0 +1,16 @@ +{ + "name": "querypath/QueryPath", + "type": "library", + "description": "HTML/XML querying (CSS 4 or XPath) and processing (like jQuery)", + "homepage": "https://github.com/technosophos/querypath", + "license": "MIT", + "keywords": ["xml", "html", "css", "jquery", "xslt"], + "require" : { + "php" : ">=5.3.0", + "masterminds/html5": "2.*" + }, + "autoload": { + "psr-0": {"QueryPath": "src/"}, + "files": ["src/qp_functions.php"] + } +} diff --git a/lib/querypath/config.doxy b/lib/querypath/config.doxy new file mode 100644 index 0000000..4a71ea4 --- /dev/null +++ b/lib/querypath/config.doxy @@ -0,0 +1,1631 @@ +# Doxyfile 1.7.1 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# http://www.gnu.org/software/libiconv for the list of possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = QueryPath + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = -UNSTABLE% + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = doc + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, +# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English +# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, +# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, +# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful is your file systems +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like regular Qt-style comments +# (thus requiring an explicit @brief command for a brief description.) + +JAVADOC_AUTOBRIEF = YES + +# If the QT_AUTOBRIEF tag is set to YES then Doxygen will +# interpret the first line (until the first dot) of a Qt-style +# comment as the brief description. If set to NO, the comments +# will behave just like regular Qt-style comments (thus requiring +# an explicit \brief command for a brief description.) + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 2 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for +# Java. For instance, namespaces will be presented as packages, qualified +# scopes will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources only. Doxygen will then generate output that is more tailored for +# Fortran. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for +# VHDL. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given extension. +# Doxygen has a built-in mapping, but you can override or extend it using this +# tag. The format is ext=language, where ext is a file extension, and language +# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C, +# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make +# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C +# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions +# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. +# Doxygen will parse them like normal C++ but will assume all classes use public +# instead of private inheritance when no explicit protection keyword is present. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate getter +# and setter methods for a property. Setting this option to YES (the default) +# will make doxygen to replace the get and set methods by a property in the +# documentation. This will only work if the methods are indeed getting or +# setting a simple type. If this is not the case, or you want to show the +# methods anyway, you should set this option to NO. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum +# is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically +# be useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. + +TYPEDEF_HIDES_STRUCT = NO + +# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to +# determine which symbols to keep in memory and which to flush to disk. +# When the cache is full, less often used symbols will be written to disk. +# For small to medium size projects (<1000 input files) the default value is +# probably good enough. For larger projects a too small cache size can cause +# doxygen to be busy swapping symbols to and from disk most of the time +# causing a significant performance penality. +# If the system has enough physical memory increasing the cache will improve the +# performance by keeping more symbols in memory. Note that the value works on +# a logarithmic scale so increasing the size by one will rougly double the +# memory usage. The cache size is given by this formula: +# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols + +SYMBOL_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base +# name of the file that contains the anonymous namespace. By default +# anonymous namespace are hidden. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = NO + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = YES + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen +# will list include files with double quotes in the documentation +# rather than with sharp brackets. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen +# will sort the (brief and detailed) documentation of class members so that +# constructors and destructors are listed first. If set to NO (the default) +# the constructors will appear in the respective orders defined by +# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. +# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO +# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the +# hierarchy of group names into alphabetical order. If set to NO (the default) +# the group names will appear in their defined order. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or define consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and defines in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. The default is NO. + +SHOW_DIRECTORIES = NO + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. +# This will remove the Files entry from the Quick Index and from the +# Folder Tree View (if specified). The default is YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the +# Namespaces page. This will remove the Namespaces entry from the Quick Index +# and from the Folder Tree View (if specified). The default is YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command , where is the value of +# the FILE_VERSION_FILTER tag, and is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. The create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. +# You can optionally specify a file name after the option, if omitted +# DoxygenLayout.xml will be used as the name of the layout file. + +LAYOUT_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be abled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = ./src ./test ./examples + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is +# also the default input encoding. Doxygen uses libiconv (or the iconv built +# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for +# the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx +# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 + +FILE_PATTERNS = *.php \ *.module \ *.inc \ *.install \ *.js \ *.theme + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or +# directories that are symbolic links (a Unix filesystem feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = ./examples + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = * + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER +# is applied to all files. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = YES + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = NO + +# If the REFERENCED_BY_RELATION tag is set to YES +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = YES + +# If the REFERENCES_RELATION tag is set to YES +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = YES + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. Otherwise they will link to the documentation. + +REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = YES + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. +# Doxygen will adjust the colors in the stylesheet and background images +# according to this color. Hue is specified as an angle on a colorwheel, +# see http://en.wikipedia.org/wiki/Hue for more information. +# For instance the value 0 represents red, 60 is yellow, 120 is green, +# 180 is cyan, 240 is blue, 300 purple, and 360 is red again. +# The allowed range is 0 to 359. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of +# the colors in the HTML output. For a value of 0 the output will use +# grayscales only. A value of 255 will produce the most vivid colors. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to +# the luminance component of the colors in the HTML output. Values below +# 100 gradually make the output lighter, whereas values above 100 make +# the output darker. The value divided by 100 is the actual gamma applied, +# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, +# and 100 does not change the gamma. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting +# this to NO can help when comparing the output of multiple runs. + +HTML_TIMESTAMP = YES + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. For this to work a browser that supports +# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox +# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). + +HTML_DYNAMIC_SECTIONS = NO + +# If the GENERATE_DOCSET tag is set to YES, additional index files +# will be generated that can be used as input for Apple's Xcode 3 +# integrated development environment, introduced with OSX 10.5 (Leopard). +# To create a documentation set, doxygen will generate a Makefile in the +# HTML output directory. Running make will produce the docset in that +# directory and running "make install" will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find +# it at startup. +# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. + +GENERATE_DOCSET = NO + +# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the +# feed. A documentation feed provides an umbrella under which multiple +# documentation sets from a single provider (such as a company or product suite) +# can be grouped. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that +# should uniquely identify the documentation set bundle. This should be a +# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen +# will append .docset to the name. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING +# is used to encode HtmlHelp index (hhk), content (hhc) and project file +# content. + +CHM_INDEX_ENCODING = + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated +# that can be used as input for Qt's qhelpgenerator to generate a +# Qt Compressed Help (.qch) of the generated HTML documentation. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can +# be used to specify the file name of the resulting .qch file. +# The path specified is relative to the HTML output folder. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#namespace + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#virtual-folders + +QHP_VIRTUAL_FOLDER = doc + +# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to +# add. For more information please see +# http://doc.trolltech.com/qthelpproject.html#custom-filters + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see +# +# Qt Help Project / Custom Filters. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's +# filter section matches. +# +# Qt Help Project / Filter Attributes. + +QHP_SECT_FILTER_ATTRS = + +# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can +# be used to specify the location of Qt's qhelpgenerator. +# If non-empty doxygen will try to run qhelpgenerator on the generated +# .qhp file. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files +# will be generated, which together with the HTML files, form an Eclipse help +# plugin. To install this plugin and make it available under the help contents +# menu in Eclipse, the contents of the directory containing the HTML and XML +# files needs to be copied into the plugins directory of eclipse. The name of +# the directory within the plugins directory should be the same as +# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before +# the help appears. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have +# this name. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + +ENUM_VALUES_PER_LINE = 4 + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. +# If the tag value is set to YES, a side panel will be generated +# containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). +# Windows users are probably better off using the HTML help feature. + +GENERATE_TREEVIEW = YES + +# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, +# and Class Hierarchy pages using a tree view instead of an ordered list. + +USE_INLINE_TREES = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open +# links to external symbols imported via tag files in a separate window. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of Latex formulas included +# as images in the HTML documentation. The default is 10. Note that +# when you change the font size after a successful doxygen run you need +# to manually remove any form_*.png images from the HTML output directory +# to force them to be regenerated. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are +# not supported properly for IE 6.0, but are supported on all modern browsers. +# Note that when changing this option you need to delete any form_*.png files +# in the HTML output before the changes have effect. + +FORMULA_TRANSPARENT = YES + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box +# for the HTML output. The underlying search engine uses javascript +# and DHTML and should work on any modern browser. Note that when using +# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets +# (GENERATE_DOCSET) there is already a search function so this one should +# typically be disabled. For large projects the javascript based search engine +# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. + +SEARCHENGINE = YES + +# When the SERVER_BASED_SEARCH tag is enabled the search engine will be +# implemented using a PHP enabled web server instead of at the web client +# using Javascript. Doxygen will generate the search PHP script and index +# file to put on the web server. The advantage of the server +# based approach is that it scales better to large projects and allows +# full text search. The disadvances is that it is more difficult to setup +# and does not have live searching capabilities. + +SERVER_BASED_SEARCH = NO + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = NO + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. +# Note that when enabling USE_PDFLATEX this option is only used for +# generating bitmaps for formulas in the HTML output, but not in the +# Makefile that is written to the output directory. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = YES + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +# If LATEX_SOURCE_CODE is set to YES then doxygen will include +# source code with syntax highlighting in the LaTeX output. +# Note that which sources are shown also depends on other settings +# such as SOURCE_BROWSER. + +LATEX_SOURCE_CODE = YES + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. This is useful +# if you want to understand what is going on. On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line, have an all uppercase name, and do not end with a semicolon. Such +# function macros are typically used for boiler-plate code, and will confuse +# the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option is superseded by the HAVE_DOT option below. This is only a +# fallback. It is recommended to install and use dot, since it yields more +# powerful graphs. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see +# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = NO + +# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is +# allowed to run in parallel. When set to 0 (the default) doxygen will +# base this on the number of processors available in the system. You can set it +# explicitly to a value larger than 0 to get control over the balance +# between CPU load and processing speed. + +DOT_NUM_THREADS = 0 + +# By default doxygen will write a font called FreeSans.ttf to the output +# directory and reference it in all dot files that doxygen generates. This +# font does not include all possible unicode characters however, so when you need +# these (or just want a differently looking font) you can specify the font name +# using DOT_FONTNAME. You need need to make sure dot is able to find the font, +# which can be done by putting it in a standard location or by setting the +# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory +# containing the font. + +DOT_FONTNAME = FreeSans.ttf + +# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. +# The default size is 10pt. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the output directory to look for the +# FreeSans.ttf font (which doxygen will put there itself). If you specify a +# different font using DOT_FONTNAME you can set the path where dot +# can find it using this tag. + +DOT_FONTPATH = + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT options are set to YES then +# doxygen will generate a call dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable call graphs +# for selected functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then +# doxygen will generate a caller dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable caller +# graphs for selected functions only using the \callergraph command. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are png, jpg, or gif +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the +# number of direct children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not +# seem to support this out of the box. Warning: Depending on the platform used, +# enabling this option may lead to badly anti-aliased labels on the edges of +# a graph (i.e. they become hard to read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = NO + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES diff --git a/lib/querypath/package.xml b/lib/querypath/package.xml new file mode 100644 index 0000000..017d150 --- /dev/null +++ b/lib/querypath/package.xml @@ -0,0 +1,546 @@ + + + QueryPath + pear.querypath.org + QueryPath: A library for working with XML, HTML, and web services. + + +QueryPath implements much of the jQuery API in PHP, and supports CSS +selector queries, jQuery's traversal and manipulation APIs, and +adds many other useful methods. Learn more at http://querypath.org. + + Matt Butcher + technosophos + matt@aleph-null.tv + yes + + + Emily Brand + eabrand + emily@example.com + yes + + + Woody Gilk + shadowhand + woody@wingsc.com + yes + + 2010-10-21 + + + 2.1.0dev2 + 2.1.0dev2 + + + devel + devel + + LGPL or MIT (Your Choice) + RELEASE NOTES + +Big Changes: + +1. There is now an `htmlqp()` function that parses crufty HTML in a far +more reliable way than `qp()`. Use this instead of any variant of +the older `@qp()` setup. + +2. The API has been brought into alignment with jQuery 1.4. See +API-2.1.0 for details. + +3. This release was driven substantially by eabrand's GSOC 2010 +contributions. Thanks, Emily! + +4. There are now Phar and PEAR packages available. Got to +http://pear.querypath.org for PEAR packages. + +5. The minimal QP distribution is no longer minified, as it reportedly +causes XDebug to crash. + +7. Data URs are now supported. QueryPath can now embed images directly +into HTML and XML this way. + +8. Documentation is now in Doxygen instead of PhpDocumentor. Thanks +to Matt Farina and Kevin O'Brien for their input. + +Changes in 2.1 + +All changes are documented in their respective doc blocks. So take a +look at http://api.querypath.org to learn more. + +New Functions + * The `htmlqp()` method has been added for parsing icky HTML. Use + this for web scraping. + +Altered Functions + * The qp() function now supports the following new options: + - convert_to_encoding + - convert_from_encoding + - strip_low_ascii + - use_parser + +New Methods + * attach()/detach() + * has() + * emptyElement() + * even()/odd() + * first()/last() + * firstChild()/lastChild() + * nextUntil()/prevUntil() + * parentsUntil() + * encodeDataURL() + * dataURL() + * filterPreg() + +Altered Methods + * css() has been changed to allow subsequent calls + to modify the style attribute (issue #28) + * attr() has been changed. If it is called with no + arguments, it now returns all attributes. + +New CSS Selectors Behavior + + * :contains-exactly() performs as :contains() used to perform. + +Altered CSS Selectors Behavior + + * The star operator (*) is now non-greedy, per spec. Before, the star would match + any descendants. Now it will only match children. + * :contains() now does substring matching instead of exact matching. This conforms + to jQuery's behavior. + * Quotes are now checked carefully before being stripped from pseudo-class values. + * Issue #40 identified a potential infinite looping problem on poorly formed selectors. + This was fixed. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 5.2.0 + + + 2.0.0a1 + + + + + diff --git a/lib/querypath/package_compatible.xml b/lib/querypath/package_compatible.xml new file mode 100644 index 0000000..50789b7 --- /dev/null +++ b/lib/querypath/package_compatible.xml @@ -0,0 +1,56 @@ + + + QueryPath + pear.querypath.org + QueryPath: A library for working with XML, HTML, and web services. + + +QueryPath implements much of the jQuery API in PHP, and supports CSS +selector queries, jQuery's traversal and manipulation APIs, and +adds many other useful methods. Learn more at http://querypath.org. + + Matt Butcher + technosophos + matt@aleph-null.tv + yes + + + Emily Brand + eabrand + emily@example.com + yes + + + Woody Gilk + shadowhand + woody@wingsc.com + yes + + 2010-10-20 + + + 2.1.0beta3 + 2.1.0beta3 + + + beta + beta + + LGPL or MIT (your choice) + + + + + + + + 5.2.0 + + + 1.4.8 + + + + + + diff --git a/lib/querypath/pear-summary.txt b/lib/querypath/pear-summary.txt new file mode 100644 index 0000000..015119a --- /dev/null +++ b/lib/querypath/pear-summary.txt @@ -0,0 +1,5 @@ +QueryPath: A library for working with XML, HTML, and web services. + +QueryPath implements much of the jQuery API in PHP, and supports CSS +selector queries, jQuery's traversal and manipulation APIs, and +adds many other useful methods. Learn more at http://querypath.org. \ No newline at end of file diff --git a/lib/querypath/quickstart-guide.md b/lib/querypath/quickstart-guide.md new file mode 100644 index 0000000..da1cfde --- /dev/null +++ b/lib/querypath/quickstart-guide.md @@ -0,0 +1,113 @@ +# QueryPath QuickStart + +This short guide is intended to help you get started with QueryPath 3. + +## Using QueryPath in Your Project + +To use QueryPath inside of your own application, you will need to make sure that PHP can find the QueryPath library. There are a few possible ways of doing this. The first is to use an autoloader. The second is to include QueryPath manually. We'll look briefly at each. + +### Autoloaders and QueryPath + +In recent time, PHP has standardized a method of automatically importing classes by name. This is often called [PSR-0 autoloading](https://github.com/php-fig/fig-standards/blob/master/accepted/PSR-0.md). Symfony, Composer, and many other PHP projects use PSR-0 autoloaders, and QueryPath should work with those. In addition, QueryPath has its own autoloader in `qp.php`. + +To use QueryPath's autoloader, all you need to do is include `qp.php`. This will detect if another autoloader is already in place, and if not, it will configure it's own autoloader: + +```{php} +text(); + +print htmlqp('http://technosophos.com', 'title')->text(); +?> +``` + +The above illustrates the requiring of QueryPath's autoloader. Note that in that case we don't need to do anything else to get the `QueryPath` class or the `htmlqp()` functions. + +QueryPath also ships with [Composer](http://getcomposer.org) support. Composer provides PSR-0 autoloading. To use Composer's autoloader, you can do this: + +```{php} +text(); + +// THIS DOESN'T WORK! +// print htmlqp('http://technosophos.com', 'title')->text(); +?> +``` + +Notice, though, that the `qp()` and `htmlqp` functions *will not work* with this method. Why? Because PHP's autoloader does not know about functions. It operates on classes only. So you can use QueryPath's Object-Oriented API (`QueryPath::with()`, `QueryPath::withHTML()`, `QueryPath::withXML()`), but not the `qp()` and `qphtml()` functions. If you want to use those, too, simply include `qp.php`: + +```{php} +text(); + +// This works because qp.php was imported +print htmlqp('http://technosophos.com', 'title')->text(); +?> +``` + +## A Simple Example + +So far, we have seen a few variations of the same program. Let's learn what it does. Here's the program: + +```{php} +text(); + +print htmlqp('http://technosophos.com', 'title')->text(); +?> +``` + +This does the same thing two different ways. Let's look at line 3: + +```{php} +text(); +?> +``` + +This line does three things: + +1. It loads and parses the HTML document it finds at `http://techosophos.com`. QueryPath can load documents locally and remotely. It can also load strings of HTML or XML, as well as `SimpleXML` objects and `DOMDocument` objects. It should be easy to get your HTML or XML loaded into QueryPath. +2. It performs a search for the tag named `title`. QueryPath uses CSS 4 Selectors (as the current draft stands) as a query language -- just like jQuery and CSS. (If you prefer XPath, check out the `xpath()` method on QueryPath). Of course, `title` is a very basic selector. You can do more advanced selectors like `#bar-one table>tr:odd td>a:first-of-type()`, which looks for the element with ID `bar-one` and then fetches every odd row from its table, then from each cell in the row, it finds the first hyperlink. +3. Finally, the example calls `text()`, which will fetch the text content of the first element it's found (in this case, the `title` tag in the HTML head). If not title is found, this will return an empty string. Otherwise it will return the text of that tag. + +QueryPath has well over 60 methods like `text()`. Some are for navigating, like `top()`, `children()`, `next()`, and `prev()`. Some are for manipulating the parts of an HTML or XML element, like `attar()`. Others are for doing sophisticated finding and filtering operations (`find()`, `filter()`, `filterCallback()`, `map()`, and so on). And, of course, there are methods for modifying the document (`append()`, `before()`, `after()`, `attr()`, `text()`, and many more). + +The goal of QueryPath is to make it easy for you to process XML and HTML documents. There may be a lot of methods to learn (just like jQuery), but those methods are there to make your life simpler. + +## HTML vs XML + +When QueryPath was first introduced, it did not distinguish between XML and HTML documents. At that time, momentum was behind XHTML, and it looked like the future was XML. But over time, it has become abundantly clear that HTML documents cannot be treated as XML during parsing and processing, or during output. + +So there are now separate parser functions for HTML and XML -- as well as a generic parser function that inspects the document and attempts to determine whether it is XML or HTML: + +* `QueryPath::withXML()`: This *only* handles XML documents. If you give it an HTML document, it will attempt to force XML parsing on that document. +* `htmlqp()`, `QueryPath::withHTML()`: This will force QueryPath to use the HTML parser. it will also make a number of adjustments to QueryPath to accommodate common HTML breakages. +* `qp()`, `QueryPath::with()`: This will attempt to guess whether the document is XML or HTML. In general, it favors XML slightly. Guessing may be done by… + - File extension + - XML declaration + - The suggestions made by any options passed into the document + +###… And Character Encoding + +XML suggests that all documents be encoded as UTF-8. Most HTML documents are encoded using one of the ISO specifications (typically ISO-8859-1). And web servers are often misconfigured to report that documents are using one character set when they are actually using another. + +To work around all of these issues, QueryPath attempts to convert documents automatically. It does this using PHP's internal character detection libraries. But sometimes it guesses wrong. You can adjust this feature manually by passing in language settings in the `$options` array. See the documentation on `qp()` for details. + + +## Where to go from here + +* [QueryPath.org](http://querypath.org) has pointers to other resources. +* [The API docs](http://api.querypath.org) have detailed explanations of every single part of QueryPath. + + \ No newline at end of file diff --git a/lib/querypath/src/QueryPath.php b/lib/querypath/src/QueryPath.php new file mode 100644 index 0000000..2013377 --- /dev/null +++ b/lib/querypath/src/QueryPath.php @@ -0,0 +1,356 @@ +'; + * + * // Procedural call a la jQuery: + * $qp = qp($xml, '#myID'); + * $qp->append('')->writeHTML(); + * + * // Object-oriented version with a factory: + * $qp = QueryPath::with($xml)->find('#myID') + * $qp->append('')->writeHTML(); + * ?> + * @endcode + * + * The above would print (formatted for readability): + * @code + * + * + * + * + * + * + * + * + * @endcode + * + * ## Discovering the Library + * + * To gain familiarity with QueryPath, the following three API docs are + * the best to start with: + * + *- qp(): This function constructs new queries, and is the starting point + * for manipulating a document. htmlqp() is an alias tuned for HTML + * documents (especially old HTML), and QueryPath::with(), QueryPath::withXML() + * and QueryPath::withHTML() all perform a similar role, but in a purely + * object oriented way. + *- QueryPath: This is the top-level class for the library. It defines the + * main factories and some useful functions. + *- QueryPath::Query: This defines all of the functions in QueryPath. When + * working with HTML and XML, the QueryPath::DOMQuery is the actual + * implementation that you work with. + * + * Included with the source code for QueryPath is a complete set of unit tests + * as well as some example files. Those are good resources for learning about + * how to apply QueryPath's tools. The full API documentation can be generated + * from these files using Doxygen, or you can view it online at + * http://api.querypath.org. + * + * If you are interested in building extensions for QueryPath, see the + * QueryPath and QueryPath::Extension classes. There you will find information on adding + * your own tools to QueryPath. + * + * QueryPath also comes with a full CSS 3 selector implementation (now + * with partial support for the current draft of the CSS 4 selector spec). If + * you are interested in reusing that in other code, you will want to start + * with QueryPath::CSS::EventHandler.php, which is the event interface for the parser. + * + * All of the code in QueryPath is licensed under an MIT-style license + * license. All of the code is Copyright, 2012 by Matt Butcher. + * + * @author M Butcher + * @license MIT + * @see QueryPath + * @see qp() + * @see http://querypath.org The QueryPath home page. + * @see http://api.querypath.org An online version of the API docs. + * @see http://technosophos.com For how-tos and examples. + * @copyright Copyright (c) 2009-2012, Matt Butcher. + * @version -UNSTABLE% (3.x.x) + * + */ + +use \Masterminds\HTML5; + +/** + * + */ +class QueryPath { + /** + * The version string for this version of QueryPath. + * + * Standard releases will be of the following form: .[.][-STABILITY]. + * + * Examples: + * - 2.0 + * - 2.1.1 + * - 2.0-alpha1 + * + * Developer releases will always be of the form dev-. + * + * @since 2.0 + */ + const VERSION = '3.0.x'; + + /** + * Major version number. + * + * Examples: + * - 3 + * - 4 + * + * @since 3.0.1 + */ + const VERSION_MAJOR = 3; + + /** + * This is a stub HTML 4.01 document. + * + * Using {@link QueryPath::XHTML_STUB} is preferred. + * + * This is primarily for generating legacy HTML content. Modern web applications + * should use QueryPath::XHTML_STUB. + * + * Use this stub with the HTML familiy of methods (QueryPath::Query::html(), + * QueryPath::Query::writeHTML(), QueryPath::Query::innerHTML()). + */ + const HTML_STUB = ' + + + + Untitled + + + '; + + const HTML5_STUB = ' + + + Untitled + + + '; + + /** + * This is a stub XHTML document. + * + * Since XHTML is an XML format, you should use XML functions with this document + * fragment. For example, you should use {@link xml()}, {@link innerXML()}, and + * {@link writeXML()}. + * + * This can be passed into {@link qp()} to begin a new basic HTML document. + * + * Example: + * @code + * $qp = qp(QueryPath::XHTML_STUB); // Creates a new XHTML document + * $qp->writeXML(); // Writes the document as well-formed XHTML. + * @endcode + * @since 2.0 + */ + const XHTML_STUB = ' + + + + + Untitled + + + '; + + + public static function with($document = NULL, $selector = NULL, $options = array()) { + $qpClass = isset($options['QueryPath_class']) ? $options['QueryPath_class'] : '\QueryPath\DOMQuery'; + + $qp = new $qpClass($document, $selector, $options); + return $qp; + } + + public static function withXML($source = NULL, $selector = NULL, $options = array()) { + $options += array( + 'use_parser' => 'xml', + ); + return self::with($source, $selector, $options); + } + + public static function withHTML($source = NULL, $selector = NULL, $options = array()) { + // Need a way to force an HTML parse instead of an XML parse when the + // doctype is XHTML, since many XHTML documents are not valid XML + // (because of coding errors, not by design). + + $options += array( + 'ignore_parser_warnings' => TRUE, + 'convert_to_encoding' => 'ISO-8859-1', + 'convert_from_encoding' => 'auto', + //'replace_entities' => TRUE, + 'use_parser' => 'html', + // This is stripping actually necessary low ASCII. + //'strip_low_ascii' => TRUE, + ); + return @self::with($source, $selector, $options); + } + + /** + * Parse HTML5 documents. + * + * This uses HTML5-PHP to parse the document. In actuality, this parser does + * a fine job with pre-HTML5 documents in most cases, though really old HTML + * (like 2.0) may have some substantial quirks. + * + * Supported Options + * Any options supported by HTML5-PHP are allowed here. Additionally, the + * following options have meaning to QueryPath. + * - QueryPath_class + * + * + * @param mixed $source + * A document as an HTML string, or a path/URL. For compatibility with + * existing functions, a DOMDocument, SimpleXMLElement, DOMNode or array + * of DOMNodes will be passed through as well. However, these types are not + * validated in any way. + * + * @param string $selector + * A CSS3 selector. + * + * @param array $options + * An associative array of options, which is passed on into HTML5-PHP. Note + * that the standard QueryPath options may be ignored for this function, + * since it uses a different parser. + * + * @return QueryPath + */ + public static function withHTML5($source = NULL, $selector = NULL, $options = array()) { + $qpClass = isset($options['QueryPath_class']) ? $options['QueryPath_class'] : '\QueryPath\DOMQuery'; + + if(is_string($source)) { + $html5 = new HTML5(); + if (strpos($source, '<') !== FALSE && strpos($source, '>') !== FALSE) { + $source = $html5->loadHTML($source); + } + else { + $source = $html5->load($source); + } + } + + $qp = new $qpClass($source, $selector, $options); + return $qp; + } + + /** + * Enable one or more extensions. + * + * Extensions provide additional features to QueryPath. To enable and + * extension, you can use this method. + * + * In this example, we enable the QPTPL extension: + * @code + * + * @endcode + * + * Note that the name is a fully qualified class name. + * + * We can enable more than one extension at a time like this: + * + * @code + * + * @endcode + * + * @attention If you are not using an autoloader, you will need to + * manually `require` or `include` the files that contain the + * extensions. + * + * @param mixed $extensionNames + * The name of an extension or an array of extension names. + * QueryPath assumes that these are extension class names, + * and attempts to register these as QueryPath extensions. + */ + public static function enable($extensionNames) { + + if (is_array($extensionNames)) { + foreach ($extensionNames as $extension) { + \QueryPath\ExtensionRegistry::extend($extension); + } + } + else { + \QueryPath\ExtensionRegistry::extend($extensionNames); + } + } + + /** + * Get a list of all of the enabled extensions. + * + * This example dumps a list of extensions to standard output: + * @code + * + * @endcode + * + * @return array + * An array of extension names. + * + * @see QueryPath::ExtensionRegistry + */ + public static function enabledExtensions() { + return \QueryPath\ExtensionRegistry::extensionNames(); + } + + + + /** + * A static function for transforming data into a Data URL. + * + * This can be used to create Data URLs for injection into CSS, JavaScript, or other + * non-XML/HTML content. If you are working with QP objects, you may want to use + * dataURL() instead. + * + * @param mixed $data + * The contents to inject as the data. The value can be any one of the following: + * - A URL: If this is given, then the subsystem will read the content from that URL. THIS + * MUST BE A FULL URL, not a relative path. + * - A string of data: If this is given, then the subsystem will encode the string. + * - A stream or file handle: If this is given, the stream's contents will be encoded + * and inserted as data. + * (Note that we make the assumption here that you would never want to set data to be + * a URL. If this is an incorrect assumption, file a bug.) + * @param string $mime + * The MIME type of the document. + * @param resource $context + * A valid context. Use this only if you need to pass a stream context. This is only necessary + * if $data is a URL. (See {@link stream_context_create()}). + * @return + * An encoded data URL. + */ + public static function encodeDataURL($data, $mime = 'application/octet-stream', $context = NULL) { + if (is_resource($data)) { + $data = stream_get_contents($data); + } + elseif (filter_var($data, FILTER_VALIDATE_URL)) { + $data = file_get_contents($data, FALSE, $context); + } + + $encoded = base64_encode($data); + + return 'data:' . $mime . ';base64,' . $encoded; + } + +} diff --git a/lib/querypath/src/QueryPath/CSS/DOMTraverser.php b/lib/querypath/src/QueryPath/CSS/DOMTraverser.php new file mode 100644 index 0000000..be8c2af --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/DOMTraverser.php @@ -0,0 +1,775 @@ +psHandler = new \QueryPath\CSS\DOMTraverser\PseudoClass(); + $this->initialized = $initialized; + + // Re-use the initial splos + $this->matches = $splos; + + if (count($splos) != 0) { + $splos->rewind(); + $first = $splos->current(); + if ($first instanceof \DOMDocument) { + $this->dom = $first;//->documentElement; + } + else { + $this->dom = $first->ownerDocument;//->documentElement; + } + if (empty($scopeNode)) { + $this->scopeNode = $this->dom->documentElement; + } + else { + $this->scopeNode = $scopeNode; + } + } + + // This assumes a DOM. Need to also accomodate the case + // where we get a set of elements. + /* + $this->dom = $dom; + $this->matches = new \SplObjectStorage(); + $this->matches->attach($this->dom); + */ + } + + public function debug($msg) { + fwrite(STDOUT, PHP_EOL . $msg); + } + + /** + * Given a selector, find the matches in the given DOM. + * + * This is the main function for querying the DOM using a CSS + * selector. + * + * @param string $selector + * The selector. + * @return \SPLObjectStorage + * An SPLObjectStorage containing a list of matched + * DOMNode objects. + */ + public function find($selector) { + // Setup + $handler = new Selector(); + $parser = new Parser($selector, $handler); + $parser->parse(); + $this->selector = $handler; + + //$selector = $handler->toArray(); + $found = $this->newMatches(); + foreach ($handler as $selectorGroup) { + // fprintf(STDOUT, "Selector group.\n"); + // Initialize matches if necessary. + if ($this->initialized) { + $candidates = $this->matches; + } + else { + //if (empty($selectorGroup)) { + // fprintf(STDOUT, "%s", print_r($handler->toArray(), TRUE)); + //} + $candidates = $this->initialMatch($selectorGroup[0], $this->matches); + //$this->initialized = TRUE; + } + + foreach ($candidates as $candidate) { + // fprintf(STDOUT, "Testing %s against %s.\n", $candidate->tagName, $selectorGroup[0]); + if ($this->matchesSelector($candidate, $selectorGroup)) { + // $this->debug('Attaching ' . $candidate->nodeName); + $found->attach($candidate); + } + } + } + $this->setMatches($found); + + + return $this; + } + + public function matches() { + return $this->matches; + } + + /** + * Check whether the given node matches the given selector. + * + * A selector is a group of one or more simple selectors combined + * by combinators. This determines if a given selector + * matches the given node. + * + * @attention + * Evaluation of selectors is done recursively. Thus the length + * of the selector is limited to the recursion depth allowed by + * the PHP configuration. This should only cause problems for + * absolutely huge selectors or for versions of PHP tuned to + * strictly limit recursion depth. + * + * @param object DOMNode + * The DOMNode to check. + * @param array Selector->toArray() + * The Selector to check. + * @return boolean + * A boolean TRUE if the node matches, false otherwise. + */ + public function matchesSelector($node, $selector) { + return $this->matchesSimpleSelector($node, $selector, 0); + } + + /** + * Performs a match check on a SimpleSelector. + * + * Where matchesSelector() does a check on an entire selector, + * this checks only a simple selector (plus an optional + * combinator). + * + * @param object DOMNode + * The DOMNode to check. + * @param object SimpleSelector + * The Selector to check. + * @return boolean + * A boolean TRUE if the node matches, false otherwise. + */ + public function matchesSimpleSelector($node, $selectors, $index) { + $selector = $selectors[$index]; + // Note that this will short circuit as soon as one of these + // returns FALSE. + $result = $this->matchElement($node, $selector->element, $selector->ns) + && $this->matchAttributes($node, $selector->attributes) + && $this->matchId($node, $selector->id) + && $this->matchClasses($node, $selector->classes) + && $this->matchPseudoClasses($node, $selector->pseudoClasses) + && $this->matchPseudoElements($node, $selector->pseudoElements); + + $isNextRule = isset($selectors[++$index]); + // If there is another selector, we process that if there a match + // hasn't been found. + /* + if ($isNextRule && $selectors[$index]->combinator == SimpleSelector::anotherSelector) { + // We may need to re-initialize the match set for the next selector. + if (!$this->initialized) { + $this->initialMatch($selectors[$index]); + } + if (!$result) fprintf(STDOUT, "Element: %s, Next selector: %s\n", $node->tagName, $selectors[$index]); + return $result || $this->matchesSimpleSelector($node, $selectors, $index); + } + // If we have a match and we have a combinator, we need to + // recurse up the tree. + else*/if ($isNextRule && $result) { + $result = $this->combine($node, $selectors, $index); + } + + return $result; + } + + /** + * Combine the next selector with the given match + * using the next combinator. + * + * If the next selector is combined with another + * selector, that will be evaluated too, and so on. + * So if this function returns TRUE, it means that all + * child selectors are also matches. + * + * @param DOMNode $node + * The DOMNode to test. + * @param array $selectors + * The array of simple selectors. + * @param int $index + * The index of the current selector. + * @return boolean + * TRUE if the next selector(s) match. + */ + public function combine($node, $selectors, $index) { + $selector = $selectors[$index]; + //$this->debug(implode(' ', $selectors)); + switch ($selector->combinator) { + case SimpleSelector::adjacent: + return $this->combineAdjacent($node, $selectors, $index); + case SimpleSelector::sibling: + return $this->combineSibling($node, $selectors, $index); + case SimpleSelector::directDescendant: + return $this->combineDirectDescendant($node, $selectors, $index); + case SimpleSelector::anyDescendant: + return $this->combineAnyDescendant($node, $selectors, $index); + case SimpleSelector::anotherSelector: + // fprintf(STDOUT, "Next selector: %s\n", $selectors[$index]); + return $this->matchesSimpleSelector($node, $selectors, $index); + ; + } + return FALSE; + } + + /** + * Process an Adjacent Sibling. + * + * The spec does not indicate whether Adjacent should ignore non-Element + * nodes, so we choose to ignore them. + * + * @param DOMNode $node + * A DOM Node. + * @param array $selectors + * The selectors array. + * @param int $index + * The current index to the operative simple selector in the selectors + * array. + * @return boolean + * TRUE if the combination matches, FALSE otherwise. + */ + public function combineAdjacent($node, $selectors, $index) { + while (!empty($node->previousSibling)) { + $node = $node->previousSibling; + if ($node->nodeType == XML_ELEMENT_NODE) { + //$this->debug(sprintf('Testing %s against "%s"', $node->tagName, $selectors[$index])); + return $this->matchesSimpleSelector($node, $selectors, $index); + } + } + return FALSE; + } + + /** + * Check all siblings. + * + * According to the spec, this only tests elements LEFT of the provided + * node. + * + * @param DOMNode $node + * A DOM Node. + * @param array $selectors + * The selectors array. + * @param int $index + * The current index to the operative simple selector in the selectors + * array. + * @return boolean + * TRUE if the combination matches, FALSE otherwise. + */ + public function combineSibling($node, $selectors, $index) { + while (!empty($node->previousSibling)) { + $node = $node->previousSibling; + if ($node->nodeType == XML_ELEMENT_NODE && $this->matchesSimpleSelector($node, $selectors, $index)) { + return TRUE; + } + } + return FALSE; + } + + /** + * Handle a Direct Descendant combination. + * + * Check whether the given node is a rightly-related descendant + * of its parent node. + * + * @param DOMNode $node + * A DOM Node. + * @param array $selectors + * The selectors array. + * @param int $index + * The current index to the operative simple selector in the selectors + * array. + * @return boolean + * TRUE if the combination matches, FALSE otherwise. + */ + public function combineDirectDescendant($node, $selectors, $index) { + $parent = $node->parentNode; + if (empty($parent)) { + return FALSE; + } + return $this->matchesSimpleSelector($parent, $selectors, $index); + } + + /** + * Handle Any Descendant combinations. + * + * This checks to see if there are any matching routes from the + * selector beginning at the present node. + * + * @param DOMNode $node + * A DOM Node. + * @param array $selectors + * The selectors array. + * @param int $index + * The current index to the operative simple selector in the selectors + * array. + * @return boolean + * TRUE if the combination matches, FALSE otherwise. + */ + public function combineAnyDescendant($node, $selectors, $index) { + while (!empty($node->parentNode)) { + $node = $node->parentNode; + + // Catch case where element is child of something + // else. This should really only happen with a + // document element. + if ($node->nodeType != XML_ELEMENT_NODE) { + continue; + } + + if ($this->matchesSimpleSelector($node, $selectors, $index)) { + return TRUE; + } + } + } + + /** + * Get the intial match set. + * + * This should only be executed when not working with + * an existing match set. + */ + protected function initialMatch($selector, $matches) { + $element = $selector->element; + + // If no element is specified, we have to start with the + // entire document. + if ($element == NULL) { + $element = '*'; + } + + // fprintf(STDOUT, "Initial match using %s.\n", $selector); + + // We try to do some optimization here to reduce the + // number of matches to the bare minimum. This will + // reduce the subsequent number of operations that + // must be performed in the query. + + // Experimental: ID queries use XPath to match, since + // this should give us only a single matched element + // to work with. + if (/*$element == '*' &&*/ !empty($selector->id)) { + // fprintf(STDOUT, "ID Fastrack on %s\n", $selector); + $initialMatches = $this->initialMatchOnID($selector, $matches); + } + // If a namespace is set, find the namespace matches. + elseif (!empty($selector->ns)) { + $initialMatches = $this->initialMatchOnElementNS($selector, $matches); + } + // If the element is a wildcard, using class can + // substantially reduce the number of elements that + // we start with. + elseif ($element == '*' && !empty($selector->classes)) { + // fprintf(STDOUT, "Class Fastrack on %s\n", $selector); + $initialMatches = $this->initialMatchOnClasses($selector, $matches); + } + else { + $initialMatches = $this->initialMatchOnElement($selector, $matches); + } + + //fprintf(STDOUT, "Found %d nodes.\n", count($this->matches)); + return $initialMatches; + } + + /** + * Shortcut for finding initial match by ID. + * + * If the element is set to '*' and an ID is + * set, then this should be used to find by ID, + * which will drastically reduce the amount of + * comparison operations done in PHP. + * + */ + protected function initialMatchOnID($selector, $matches) { + $id = $selector->id; + $found = $this->newMatches(); + + // Issue #145: DOMXPath will through an exception if the DOM is + // not set. + if (!($this->dom instanceof \DOMDocument)) { + return $found; + } + $baseQuery = ".//*[@id='{$id}']"; + $xpath = new \DOMXPath($this->dom); + + // Now we try to find any matching IDs. + foreach ($matches as $node) { + if ($node->getAttribute('id') == $id) { + $found->attach($node); + } + $nl = $this->initialXpathQuery($xpath, $node, $baseQuery); + $this->attachNodeList($nl, $found); + } + // Unset the ID selector. + $selector->id = NULL; + return $found; + } + + /** + * Shortcut for setting the intial match. + * + * This shortcut should only be used when the initial + * element is '*' and there are classes set. + * + * In any other case, the element finding algo is + * faster and should be used instead. + */ + protected function initialMatchOnClasses($selector, $matches) { + $found = $this->newMatches(); + + // Issue #145: DOMXPath will through an exception if the DOM is + // not set. + if (!($this->dom instanceof \DOMDocument)) { + return $found; + } + $baseQuery = ".//*[@class]"; + $xpath = new \DOMXPath($this->dom); + + // Now we try to find any matching IDs. + foreach ($matches as $node) { + // Refactor me! + if ($node->hasAttribute('class')) { + $intersect = array_intersect($selector->classes, explode(' ', $node->getAttribute('class'))); + if (count($intersect) == count($selector->classes)) { + $found->attach($node); + } + } + + $nl = $this->initialXpathQuery($xpath, $node, $baseQuery); + foreach ($nl as $node) { + $classes = $node->getAttribute('class'); + $classArray = explode(' ', $classes); + + $intersect = array_intersect($selector->classes, $classArray); + if (count($intersect) == count($selector->classes)) { + $found->attach($node); + } + } + } + + // Unset the classes selector. + $selector->classes = array(); + + return $found; + } + + /** + * Internal xpath query. + * + * This is optimized for very specific use, and is not a general + * purpose function. + */ + private function initialXpathQuery($xpath, $node, $query) { + // This works around a bug in which the document element + // does not correctly search with the $baseQuery. + if ($node->isSameNode($this->dom->documentElement)) { + $query = substr($query, 1); + } + + return $xpath->query($query, $node); + } + + /** + * Shortcut for setting the initial match. + */ + protected function initialMatchOnElement($selector, $matches) { + $element = $selector->element; + if (is_null($element)) { + $element = '*'; + } + $found = $this->newMatches(); + foreach ($matches as $node) { + // Capture the case where the initial element is the root element. + if ($node->tagName == $element + || $element == '*' && $node->parentNode instanceof \DOMDocument) { + $found->attach($node); + } + $nl = $node->getElementsByTagName($element); + $this->attachNodeList($nl, $found); + } + + $selector->element = NULL; + return $found; + } + + /** + * Get elements and filter by namespace. + */ + protected function initialMatchOnElementNS($selector, $matches) { + $ns = $selector->ns; + + $elements = $this->initialMatchOnElement($selector, $matches); + + // "any namespace" matches anything. + if ($ns == '*') { + return $elements; + } + + // Loop through and make a list of items that need to be filtered + // out, then filter them. This is required b/c ObjectStorage iterates + // wrongly when an item is detached in an access loop. + $detach = array(); + foreach ($elements as $node) { + // This lookup must be done PER NODE. + $nsuri = $node->lookupNamespaceURI($ns); + if (empty($nsuri) || $node->namespaceURI != $nsuri) { + $detach[] = $node; + } + } + foreach ($detach as $rem) { + $elements->detach($rem); + } + $selector->ns = NULL; + return $elements; + } + + /** + * Checks to see if the DOMNode matches the given element selector. + * + * This handles the following cases: + * + * - element (foo) + * - namespaced element (ns|foo) + * - namespaced wildcard (ns|*) + * - wildcard (* or *|*) + */ + protected function matchElement($node, $element, $ns = NULL) { + if (empty($element)) { + return TRUE; + } + + // Handle namespace. + if (!empty($ns) && $ns != '*') { + // Check whether we have a matching NS URI. + $nsuri = $node->lookupNamespaceURI($ns); + if(empty($nsuri) || $node->namespaceURI !== $nsuri) { + return FALSE; + } + } + + // Compare local name to given element name. + return $element == '*' || $node->localName == $element; + } + + /** + * Checks to see if the given DOMNode matches an "any element" (*). + * + * This does not handle namespaced whildcards. + */ + /* + protected function matchAnyElement($node) { + $ancestors = $this->ancestors($node); + + return count($ancestors) > 0; + } + */ + + /** + * Get a list of ancestors to the present node. + */ + protected function ancestors($node) { + $buffer = array(); + $parent = $node; + while (($parent = $parent->parentNode) !== NULL) { + $buffer[] = $parent; + } + return $buffer; + } + + /** + * Check to see if DOMNode has all of the given attributes. + * + * This can handle namespaced attributes, including namespace + * wildcards. + */ + protected function matchAttributes($node, $attributes) { + if (empty($attributes)) { + return TRUE; + } + + foreach($attributes as $attr) { + $val = isset($attr['value']) ? $attr['value'] : NULL; + + // Namespaced attributes. + if (isset($attr['ns']) && $attr['ns'] != '*') { + $nsuri = $node->lookupNamespaceURI($attr['ns']); + if (empty($nsuri) || !$node->hasAttributeNS($nsuri, $attr['name'])) { + return FALSE; + } + $matches = Util::matchesAttributeNS($node, $attr['name'], $nsuri, $val, $attr['op']); + } + elseif (isset($attr['ns']) && $attr['ns'] == '*' && $node->hasAttributes()) { + // Cycle through all of the attributes in the node. Note that + // these are DOMAttr objects. + $matches = FALSE; + $name = $attr['name']; + foreach ($node->attributes as $attrNode) { + if ($attrNode->localName == $name) { + $nsuri = $attrNode->namespaceURI; + $matches = Util::matchesAttributeNS($node, $name, $nsuri, $val, $attr['op']); + } + } + } + // No namespace. + else { + $matches = Util::matchesAttribute($node, $attr['name'], $val, $attr['op']); + } + + if (!$matches) { + return FALSE; + } + } + return TRUE; + } + /** + * Check that the given DOMNode has the given ID. + */ + protected function matchId($node, $id) { + if (empty($id)) { + return TRUE; + } + return $node->hasAttribute('id') && $node->getAttribute('id') == $id; + } + /** + * Check that the given DOMNode has all of the given classes. + */ + protected function matchClasses($node, $classes) { + if (empty($classes)) { + return TRUE; + } + + if (!$node->hasAttribute('class')) { + return FALSE; + } + + $eleClasses = preg_split('/\s+/', $node->getAttribute('class')); + if (empty($eleClasses)) { + return FALSE; + } + + // The intersection should match the given $classes. + $missing = array_diff($classes, array_intersect($classes, $eleClasses)); + + return count($missing) == 0; + } + protected function matchPseudoClasses($node, $pseudoClasses) { + $ret = TRUE; + foreach ($pseudoClasses as $pseudoClass) { + $name = $pseudoClass['name']; + // Avoid E_STRICT violation. + $value = isset($pseudoClass['value']) ? $pseudoClass['value'] : NULL; + $ret &= $this->psHandler->elementMatches($name, $node, $this->scopeNode, $value); + } + return $ret; + } + /** + * Test whether the given node matches the pseudoElements. + * + * If any pseudo-elements are passed, this will test to see + * if conditions obtain that would allow the pseudo-element + * to be created. This does not modify the match in any way. + */ + protected function matchPseudoElements($node, $pseudoElements) { + if (empty($pseudoElements)) { + return TRUE; + } + + foreach ($pseudoElements as $pse) { + switch ($pse) { + case 'first-line': + case 'first-letter': + case 'before': + case 'after': + return strlen($node->textContent) > 0; + case 'selection': + throw new \QueryPath\CSS\NotImplementedException("::$name is not implemented."); + } + } + } + + protected function newMatches() { + return new \SplObjectStorage(); + } + + /** + * Get the internal match set. + * Internal utility function. + */ + protected function getMatches() { + return $this->matches(); + } + + /** + * Set the internal match set. + * + * Internal utility function. + */ + protected function setMatches($matches) { + $this->matches = $matches; + } + + /** + * Attach all nodes in a node list to the given \SplObjectStorage. + */ + public function attachNodeList(\DOMNodeList $nodeList, \SplObjectStorage $splos) { + foreach ($nodeList as $item) $splos->attach($item); + } + + public function getDocument() { + return $this->dom; + } + +} diff --git a/lib/querypath/src/QueryPath/CSS/DOMTraverser/PseudoClass.php b/lib/querypath/src/QueryPath/CSS/DOMTraverser/PseudoClass.php new file mode 100644 index 0000000..0bcaf79 --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/DOMTraverser/PseudoClass.php @@ -0,0 +1,421 @@ +lang($node, $value); + case 'any-link': + return Util::matchesAttribute($node, 'href') + || Util::matchesAttribute($node, 'src') + || Util::matchesAttribute($node, 'link'); + case 'link': + return Util::matchesAttribute($node, 'href'); + case 'local-link': + return $this->isLocalLink($node); + case 'root': + return $node->isSameNode($node->ownerDocument->documentElement); + + // CSS 4 declares the :scope pseudo-class, which describes what was + // the :x-root QueryPath extension. + case 'x-root': + case 'x-reset': + case 'scope': + return $node->isSameNode($scope); + // NON-STANDARD extensions for simple support of even and odd. These + // are supported by jQuery, FF, and other user agents. + case 'even': + return $this->isNthChild($node, 'even'); + case 'odd': + return $this->isNthChild($node, 'odd'); + case 'nth-child': + return $this->isNthChild($node, $value); + case 'nth-last-child': + return $this->isNthChild($node, $value, TRUE); + case 'nth-of-type': + return $this->isNthChild($node, $value, FALSE, TRUE); + case 'nth-last-of-type': + return $this->isNthChild($node, $value, TRUE, TRUE); + case 'first-of-type': + return $this->isFirstOfType($node); + case 'last-of-type': + return $this->isLastOfType($node); + case 'only-of-type': + return $this->isFirstOfType($node) && $this->isLastOfType($node); + + // Additional pseudo-classes defined in jQuery: + case 'lt': + // I'm treating this as "less than or equal to". + $rule = sprintf('-n + %d', (int) $value); + // $rule = '-n+15'; + return $this->isNthChild($node, $rule); + case 'gt': + // I'm treating this as "greater than" + // return $this->nodePositionFromEnd($node) > (int) $value; + return $this->nodePositionFromStart($node) > (int) $value; + case 'nth': + case 'eq': + $rule = (int)$value; + return $this->isNthChild($node, $rule); + case 'first': + return $this->isNthChild($node, 1); + case 'first-child': + return $this->isFirst($node); + case 'last': + case 'last-child': + return $this->isLast($node); + case 'only-child': + return $this->isFirst($node) && $this->isLast($node); + case 'empty': + return $this->isEmpty($node); + case 'parent': + return !$this->isEmpty($node); + + case 'enabled': + case 'disabled': + case 'checked': + return Util::matchesAttribute($node, $name); + case 'text': + case 'radio': + case 'checkbox': + case 'file': + case 'password': + case 'submit': + case 'image': + case 'reset': + case 'button': + return Util::matchesAttribute($node, 'type', $name); + + case 'header': + return $this->header($node); + case 'has': + case 'matches': + return $this->has($node, $value); + break; + case 'not': + if (empty($value)) { + throw new ParseException(":not() requires a value."); + } + return $this->isNot($node, $value); + // Contains == text matches. + // In QP 2.1, this was changed. + case 'contains': + return $this->contains($node, $value); + // Since QP 2.1 + case 'contains-exactly': + return $this->containsExactly($node, $value); + default: + throw new \QueryPath\CSS\ParseException("Unknown Pseudo-Class: " . $name); + } + $this->findAnyElement = FALSE; + } + + /** + * Pseudo-class handler for :lang + * + * Note that this does not implement the spec in its entirety because we do + * not presume to "know the language" of the document. If anyone is interested + * in making this more intelligent, please do so. + */ + protected function lang($node, $value) { + // TODO: This checks for cases where an explicit language is + // set. The spec seems to indicate that an element should inherit + // language from the parent... but this is unclear. + $operator = (strpos($value, '-') !== FALSE) ? EventHandler::isExactly : EventHandler::containsWithHyphen; + + $match = TRUE; + foreach ($node->attributes as $attrNode) { + if ($attrNode->localName == 'lang') { + + if ($attrNode->nodeName == $attrNode->localName) { + // fprintf(STDOUT, "%s in NS %s\n", $attrNode->name, $attrNode->nodeName); + return Util::matchesAttribute($node, 'lang', $value, $operator); + } + else { + $nsuri = $attrNode->namespaceURI; + // fprintf(STDOUT, "%s in NS %s\n", $attrNode->name, $nsuri); + return Util::matchesAttributeNS($node, 'lang', $nsuri, $value, $operator); + } + + } + } + return FALSE; + } + + /** + * Provides jQuery pseudoclass ':header'. + */ + protected function header($node) { + return preg_match('/^h[1-9]$/i', $node->tagName) == 1; + } + + /** + * Provides pseudoclass :empty. + */ + protected function isEmpty($node) { + foreach ($node->childNodes as $kid) { + // We don't want to count PIs and comments. From the spec, it + // appears that CDATA is also not counted. + if ($kid->nodeType == XML_ELEMENT_NODE || $kid->nodeType == XML_TEXT_NODE) { + // As soon as we hit a FALSE, return. + return FALSE; + } + } + return TRUE; + } + + /** + * Provides jQuery pseudoclass :first. + * + * @todo + * This can be replaced by isNthChild(). + */ + protected function isFirst($node) { + while (isset($node->previousSibling)) { + $node = $node->previousSibling; + if ($node->nodeType == XML_ELEMENT_NODE) { + return FALSE; + } + } + return TRUE; + } + /** + * Fast version of first-of-type. + */ + protected function isFirstOfType($node) { + $type = $node->tagName; + while (isset($node->previousSibling)) { + $node = $node->previousSibling; + if ($node->nodeType == XML_ELEMENT_NODE && $node->tagName == $type) { + return FALSE; + } + } + return TRUE; + } + /** + * Fast version of jQuery :last. + */ + protected function isLast($node) { + while (isset($node->nextSibling)) { + $node = $node->nextSibling; + if ($node->nodeType == XML_ELEMENT_NODE) { + return FALSE; + } + } + return TRUE; + } + /** + * Provides last-of-type. + */ + protected function isLastOfType($node) { + $type = $node->tagName; + while (isset($node->nextSibling)) { + $node = $node->nextSibling; + if ($node->nodeType == XML_ELEMENT_NODE && $node->tagName == $type) { + return FALSE; + } + } + return TRUE; + } + /** + * Provides :contains() as the original spec called for. + * + * This is an INEXACT match. + */ + protected function contains($node, $value) { + $text = $node->textContent; + $value = Util::removeQuotes($value); + return isset($text) && (stripos($text, $value) !== FALSE); + } + /** + * Provides :contains-exactly QueryPath pseudoclass. + * + * This is an EXACT match. + */ + protected function containsExactly($node, $value) { + $text = $node->textContent; + $value = Util::removeQuotes($value); + return isset($text) && $text == $value; + } + + /** + * Provides :has pseudoclass. + */ + protected function has($node, $selector) { + $splos = new \SPLObjectStorage(); + $splos->attach($node); + $traverser = new \QueryPath\CSS\DOMTraverser($splos, TRUE); + $results = $traverser->find($selector)->matches(); + return count($results) > 0; + } + + /** + * Provides :not pseudoclass. + */ + protected function isNot($node, $selector) { + return !$this->has($node, $selector); + } + + /** + * Get the relative position of a node in its sibling set. + */ + protected function nodePositionFromStart($node, $byType = FALSE) { + $i = 1; + $tag = $node->tagName; + while (isset($node->previousSibling)) { + $node = $node->previousSibling; + if ($node->nodeType == XML_ELEMENT_NODE && (!$byType || $node->tagName == $tag)) { + ++$i; + } + } + return $i; + } + /** + * Get the relative position of a node in its sibling set. + */ + protected function nodePositionFromEnd($node, $byType = FALSE) { + $i = 1; + $tag = $node->tagName; + while (isset($node->nextSibling)) { + $node = $node->nextSibling; + if ($node->nodeType == XML_ELEMENT_NODE && (!$byType || $node->tagName == $tag)) { + ++$i; + } + } + return $i; + } + + /** + * Provides functionality for all "An+B" rules. + * Provides nth-child and also the functionality required for: + * + *- nth-last-child + *- even + *- odd + *- first + *- last + *- eq + *- nth + *- nth-of-type + *- first-of-type + *- last-of-type + *- nth-last-of-type + * + * See also QueryPath::CSS::DOMTraverser::Util::parseAnB(). + */ + protected function isNthChild($node, $value, $reverse = FALSE, $byType = FALSE) { + list($groupSize, $elementInGroup) = Util::parseAnB($value); + $parent = $node->parentNode; + if (empty($parent) + || ($groupSize == 0 && $elementInGroup == 0) + || ($groupSize > 0 && $elementInGroup > $groupSize) + ) { + return FALSE; + } + + // First we need to find the position of $node in other elements. + if ($reverse) { + $pos = $this->nodePositionFromEnd($node, $byType); + } + else { + $pos = $this->nodePositionFromStart($node, $byType); + } + + // If group size is 0, we just check to see if this + // is the nth element: + if ($groupSize == 0) { + return $pos == $elementInGroup; + } + + // Next, we normalize $elementInGroup + if ($elementInGroup < 0) { + $elementInGroup = $groupSize + $elementInGroup; + } + + + $prod = ($pos - $elementInGroup) / $groupSize; + // fprintf(STDOUT, "%d n + %d on %d is %3.5f\n", $groupSize, $elementInGroup, $pos, $prod); + + return is_int($prod) && $prod >= 0; + } + + protected function isLocalLink($node) { + if (!$node->hasAttribute('href')) { + return FALSE; + } + $url = $node->getAttribute('href'); + $scheme = parse_url($url, PHP_URL_SCHEME); + return empty($scheme) || $scheme == 'file'; + } + +} diff --git a/lib/querypath/src/QueryPath/CSS/DOMTraverser/Util.php b/lib/querypath/src/QueryPath/CSS/DOMTraverser/Util.php new file mode 100644 index 0000000..ec01d8f --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/DOMTraverser/Util.php @@ -0,0 +1,139 @@ +hasAttribute($name)) { + return FALSE; + } + + if (is_null($value)) { + return TRUE; + } + + return self::matchesAttributeValue($value, $node->getAttribute($name), $operation); + } + /** + * Check whether the given DOMElement has the given namespaced attribute. + */ + public static function matchesAttributeNS($node, $name, $nsuri, $value = NULL, $operation = EventHandler::isExactly) { + if (!$node->hasAttributeNS($nsuri, $name)) { + return FALSE; + } + + if (is_null($value)) { + return TRUE; + } + + return self::matchesAttributeValue($value, $node->getAttributeNS($nsuri, $name), $operation); + } + + /** + * Check for attr value matches based on an operation. + */ + public static function matchesAttributeValue($needle, $haystack, $operation) { + + if (strlen($haystack) < strlen($needle)) return FALSE; + + // According to the spec: + // "The case-sensitivity of attribute names in selectors depends on the document language." + // (6.3.2) + // To which I say, "huh?". We assume case sensitivity. + switch ($operation) { + case EventHandler::isExactly: + return $needle == $haystack; + case EventHandler::containsWithSpace: + // XXX: This needs testing! + return preg_match('/\b/', $haystack) == 1; + //return in_array($needle, explode(' ', $haystack)); + case EventHandler::containsWithHyphen: + return in_array($needle, explode('-', $haystack)); + case EventHandler::containsInString: + return strpos($haystack, $needle) !== FALSE; + case EventHandler::beginsWith: + return strpos($haystack, $needle) === 0; + case EventHandler::endsWith: + //return strrpos($haystack, $needle) === strlen($needle) - 1; + return preg_match('/' . $needle . '$/', $haystack) == 1; + } + return FALSE; // Shouldn't be able to get here. + } + + /** + * Remove leading and trailing quotes. + */ + public static function removeQuotes($str) { + $f = substr($str, 0, 1); + $l = substr($str, -1); + if ($f === $l && ($f == '"' || $f == "'")) { + $str = substr($str, 1, -1); + } + return $str; + } + + /** + * Parse an an+b rule for CSS pseudo-classes. + * + * Invalid rules return `array(0, 0)`. This is per the spec. + * + * @param $rule + * Some rule in the an+b format. + * @retval array + * `array($aVal, $bVal)` of the two values. + */ + public static function parseAnB($rule) { + if ($rule == 'even') { + return array(2, 0); + } + elseif ($rule == 'odd') { + return array(2, 1); + } + elseif ($rule == 'n') { + return array(1, 0); + } + elseif (is_numeric($rule)) { + return array(0, (int)$rule); + } + + $regex = '/^\s*([+\-]?[0-9]*)n\s*([+\-]?)\s*([0-9]*)\s*$/'; + $matches = array(); + $res = preg_match($regex, $rule, $matches); + + // If it doesn't parse, return 0, 0. + if (!$res) { + return array(0, 0); + } + + $aVal = isset($matches[1]) ? $matches[1] : 1; + if ($aVal == '-') { + $aVal = -1; + } + else { + $aVal = (int) $aVal; + } + + $bVal = 0; + if (isset($matches[3])) { + $bVal = (int) $matches[3]; + if (isset($matches[2]) && $matches[2] == '-') { + $bVal *= -1; + } + } + return array($aVal, $bVal); + } + +} diff --git a/lib/querypath/src/QueryPath/CSS/EventHandler.php b/lib/querypath/src/QueryPath/CSS/EventHandler.php new file mode 100644 index 0000000..a003a0a --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/EventHandler.php @@ -0,0 +1,171 @@ + + * @license MIT + */ +namespace QueryPath\CSS; + +/** @addtogroup querypath_css CSS Parsing + * QueryPath includes a CSS 3 Selector parser. + * + * + * Typically the parser is not accessed directly. Most developers will use it indirectly from + * qp(), htmlqp(), or one of the methods on a QueryPath object. + * + * This parser is modular and is not tied to QueryPath, so you can use it in your + * own (non-QueryPath) projects if you wish. To dive in, start with EventHandler, the + * event interface that works like a SAX API for CSS selectors. If you want to check out + * the details, check out the parser (QueryPath::CSS::Parser), scanner + * (QueryPath::CSS::Scanner), and token list (QueryPath::CSS::Token). + */ + +/** + * An event handler for handling CSS 3 Selector parsing. + * + * This provides a standard interface for CSS 3 Selector event handling. As the + * parser parses a selector, it will fire events. Implementations of EventHandler + * can then handle the events. + * + * This library is inspired by the SAX2 API for parsing XML. Each component of a + * selector fires an event, passing the necessary data on to the event handler. + * + * @ingroup querypath_css + */ +interface EventHandler { + /** The is-exactly (=) operator. */ + const isExactly = 0; // = + /** The contains-with-space operator (~=). */ + const containsWithSpace = 1; // ~= + /** The contains-with-hyphen operator (!=). */ + const containsWithHyphen = 2; // |= + /** The contains-in-string operator (*=). */ + const containsInString = 3; // *= + /** The begins-with operator (^=). */ + const beginsWith = 4; // ^= + /** The ends-with operator ($=). */ + const endsWith = 5; // $= + /** The any-element operator (*). */ + const anyElement = '*'; + + /** + * This event is fired when a CSS ID is encountered. + * An ID begins with an octothorp: #name. + * + * @param string $id + * The ID passed in. + */ + public function elementID($id); // #name + /** + * Handle an element name. + * Example: name + * @param string $name + * The name of the element. + */ + public function element($name); // name + /** + * Handle a namespaced element name. + * example: namespace|name + * @param string $name + * The tag name. + * @param string $namespace + * The namespace identifier (Not the URI) + */ + public function elementNS($name, $namespace = NULL); + /** + * Handle an any-element (*) operator. + * Example: * + */ + public function anyElement(); // * + /** + * Handle an any-element operator that is constrained to a namespace. + * Example: ns|* + * @param string $ns + * The namespace identifier (not the URI). + */ + public function anyElementInNS($ns); // ns|* + /** + * Handle a CSS class selector. + * Example: .name + * @param string $name + * The name of the class. + */ + public function elementClass($name); // .name + /** + * Handle an attribute selector. + * Example: [name=attr] + * Example: [name~=attr] + * @param string $name + * The attribute name. + * @param string $value + * The value of the attribute, if given. + * @param int $operation + * The operation to be used for matching. See {@link EventHandler} + * constants for a list of supported operations. + */ + public function attribute($name, $value = NULL, $operation = EventHandler::isExactly); // [name=attr] + /** + * Handle an attribute selector bound to a specific namespace. + * Example: [ns|name=attr] + * Example: [ns|name~=attr] + * @param string $name + * The attribute name. + * @param string $ns + * The namespace identifier (not the URI). + * @param string $value + * The value of the attribute, if given. + * @param int $operation + * The operation to be used for matching. See {@link EventHandler} + * constants for a list of supported operations. + */ + public function attributeNS($name, $ns, $value = NULL, $operation = EventHandler::isExactly); + /** + * Handle a pseudo-class. + * Example: :name(value) + * @param string $name + * The pseudo-class name. + * @param string $value + * The value, if one is found. + */ + public function pseudoClass($name, $value = NULL); //:name(value) + /** + * Handle a pseudo-element. + * Example: ::name + * @param string $name + * The pseudo-element name. + */ + public function pseudoElement($name); // ::name + /** + * Handle a direct descendant combinator. + * Example: > + */ + public function directDescendant(); // > + /** + * Handle a adjacent combinator. + * Example: + + */ + public function adjacent(); // + + /** + * Handle an another-selector combinator. + * Example: , + */ + public function anotherSelector(); // , + /** + * Handle a sibling combinator. + * Example: ~ + */ + public function sibling(); // ~ combinator + /** + * Handle an any-descendant combinator. + * Example: ' ' + */ + public function anyDescendant(); // ' ' (space) operator. +} diff --git a/lib/querypath/src/QueryPath/CSS/InputStream.php b/lib/querypath/src/QueryPath/CSS/InputStream.php new file mode 100644 index 0000000..2967845 --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/InputStream.php @@ -0,0 +1,57 @@ +stream = str_split($string); + } + /** + * Look ahead one character. + * + * @return char + * Returns the next character, but does not remove it from + * the stream. + */ + function peek() { + return $this->stream[0]; + } + /** + * Get the next unconsumed character in the stream. + * This will remove that character from the front of the + * stream and return it. + */ + function consume() { + $ret = array_shift($this->stream); + if (!empty($ret)) { + $this->position++; + } + return $ret; + } + /** + * Check if the stream is empty. + * @return boolean + * Returns TRUE when the stream is empty, FALSE otherwise. + */ + function isEmpty() { + return count($this->stream) == 0; + } +} diff --git a/lib/querypath/src/QueryPath/CSS/NotImplementedException.php b/lib/querypath/src/QueryPath/CSS/NotImplementedException.php new file mode 100644 index 0000000..6705f30 --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/NotImplementedException.php @@ -0,0 +1,15 @@ +originalString = $string; + $is = new InputStream($string); + $this->scanner = new Scanner($is); + $this->handler = $handler; + } + + /** + * Parse the selector. + * + * This begins an event-based parsing process that will + * fire events as the selector is handled. A EventHandler + * implementation will be responsible for handling the events. + * @throws ParseException + */ + public function parse() { + + $this->scanner->nextToken(); + while ($this->scanner->token !== FALSE) { + // Primitive recursion detection. + $position = $this->scanner->position(); + + if ($this->DEBUG) { + print "PARSE " . $this->scanner->token. "\n"; + } + $this->selector(); + + $finalPosition = $this->scanner->position(); + + if ($this->scanner->token !== FALSE && $finalPosition == $position) { + // If we get here, then the scanner did not pop a single character + // off of the input stream during a full run of the parser, which + // means that the current input does not match any recognizable + // pattern. + throw new ParseException('CSS selector is not well formed.'); + } + + } + + } + + /** + * A restricted parser that can only parse simple selectors. + * The pseudoClass handler for this parser will throw an + * exception if it encounters a pseudo-element or the + * negation pseudo-class. + * + * @deprecated This is not used anywhere in QueryPath and + * may be removed. + *//* + public function parseSimpleSelector() { + while ($this->scanner->token !== FALSE) { + if ($this->DEBUG) print "SIMPLE SELECTOR\n"; + $this->allElements(); + $this->elementName(); + $this->elementClass(); + $this->elementID(); + $this->pseudoClass(TRUE); // Operate in restricted mode. + $this->attribute(); + + // TODO: Need to add failure conditions here. + } + }*/ + + /** + * Handle an entire CSS selector. + */ + private function selector() { + if ($this->DEBUG) print "SELECTOR{$this->scanner->position()}\n"; + $this->consumeWhitespace(); // Remove leading whitespace + $this->simpleSelectors(); + $this->combinator(); + } + + /** + * Consume whitespace and return a count of the number of whitespace consumed. + */ + private function consumeWhitespace() { + if ($this->DEBUG) print "CONSUME WHITESPACE\n"; + $white = 0; + while ($this->scanner->token == Token::white) { + $this->scanner->nextToken(); + ++$white; + } + return $white; + } + + /** + * Handle one of the five combinators: '>', '+', ' ', '~', and ','. + * This will call the appropriate event handlers. + * @see EventHandler::directDescendant(), + * @see EventHandler::adjacent(), + * @see EventHandler::anyDescendant(), + * @see EventHandler::anotherSelector(). + */ + private function combinator() { + if ($this->DEBUG) print "COMBINATOR\n"; + /* + * Problem: ' ' and ' > ' are both valid combinators. + * So we have to track whitespace consumption to see + * if we are hitting the ' ' combinator or if the + * selector just has whitespace padding another combinator. + */ + + // Flag to indicate that post-checks need doing + $inCombinator = FALSE; + $white = $this->consumeWhitespace(); + $t = $this->scanner->token; + + if ($t == Token::rangle) { + $this->handler->directDescendant(); + $this->scanner->nextToken(); + $inCombinator = TRUE; + //$this->simpleSelectors(); + } + elseif ($t == Token::plus) { + $this->handler->adjacent(); + $this->scanner->nextToken(); + $inCombinator = TRUE; + //$this->simpleSelectors(); + } + elseif ($t == Token::comma) { + $this->handler->anotherSelector(); + $this->scanner->nextToken(); + $inCombinator = TRUE; + //$this->scanner->selectors(); + } + elseif ($t == Token::tilde) { + $this->handler->sibling(); + $this->scanner->nextToken(); + $inCombinator = TRUE; + } + + // Check that we don't get two combinators in a row. + if ($inCombinator) { + $white = 0; + if ($this->DEBUG) print "COMBINATOR: " . Token::name($t) . "\n"; + $this->consumeWhitespace(); + if ($this->isCombinator($this->scanner->token)) { + throw new ParseException("Illegal combinator: Cannot have two combinators in sequence."); + } + } + // Check to see if we have whitespace combinator: + elseif ($white > 0) { + if ($this->DEBUG) print "COMBINATOR: any descendant\n"; + $inCombinator = TRUE; + $this->handler->anyDescendant(); + } + else { + if ($this->DEBUG) print "COMBINATOR: no combinator found.\n"; + } + } + + /** + * Check if the token is a combinator. + */ + private function isCombinator($tok) { + $combinators = array(Token::plus, Token::rangle, Token::comma, Token::tilde); + return in_array($tok, $combinators); + } + + /** + * Handle a simple selector. + */ + private function simpleSelectors() { + if ($this->DEBUG) print "SIMPLE SELECTOR\n"; + $this->allElements(); + $this->elementName(); + $this->elementClass(); + $this->elementID(); + $this->pseudoClass(); + $this->attribute(); + } + + /** + * Handles CSS ID selectors. + * This will call EventHandler::elementID(). + */ + private function elementID() { + if ($this->DEBUG) print "ELEMENT ID\n"; + if ($this->scanner->token == Token::octo) { + $this->scanner->nextToken(); + if ($this->scanner->token !== Token::char) { + throw new ParseException("Expected string after #"); + } + $id = $this->scanner->getNameString(); + $this->handler->elementID($id); + } + } + + /** + * Handles CSS class selectors. + * This will call the EventHandler::elementClass() method. + */ + private function elementClass() { + if ($this->DEBUG) print "ELEMENT CLASS\n"; + if ($this->scanner->token == Token::dot) { + $this->scanner->nextToken(); + $this->consumeWhitespace(); // We're very fault tolerent. This should prob through error. + $cssClass = $this->scanner->getNameString(); + $this->handler->elementClass($cssClass); + } + } + + /** + * Handle a pseudo-class and pseudo-element. + * + * CSS 3 selectors support separate pseudo-elements, using :: instead + * of : for separator. This is now supported, and calls the pseudoElement + * handler, EventHandler::pseudoElement(). + * + * This will call EventHandler::pseudoClass() when a + * pseudo-class is parsed. + */ + private function pseudoClass($restricted = FALSE) { + if ($this->DEBUG) print "PSEUDO-CLASS\n"; + if ($this->scanner->token == Token::colon) { + + // Check for CSS 3 pseudo element: + $isPseudoElement = FALSE; + if ($this->scanner->nextToken() === Token::colon) { + $isPseudoElement = TRUE; + $this->scanner->nextToken(); + } + + $name = $this->scanner->getNameString(); + if ($restricted && $name == 'not') { + throw new ParseException("The 'not' pseudo-class is illegal in this context."); + } + + $value = NULL; + if ($this->scanner->token == Token::lparen) { + if ($isPseudoElement) { + throw new ParseException("Illegal left paren. Pseudo-Element cannot have arguments."); + } + $value = $this->pseudoClassValue(); + } + + // FIXME: This should throw errors when pseudo element has values. + if ($isPseudoElement) { + if ($restricted) { + throw new ParseException("Pseudo-Elements are illegal in this context."); + } + $this->handler->pseudoElement($name); + $this->consumeWhitespace(); + + // Per the spec, pseudo-elements must be the last items in a selector, so we + // check to make sure that we are either at the end of the stream or that a + // new selector is starting. Only one pseudo-element is allowed per selector. + if ($this->scanner->token !== FALSE && $this->scanner->token !== Token::comma) { + throw new ParseException("A Pseudo-Element must be the last item in a selector."); + } + } + else { + $this->handler->pseudoClass($name, $value); + } + } + } + + /** + * Get the value of a pseudo-classes. + * + * @return string + * Returns the value found from a pseudo-class. + * + * @todo Pseudoclasses can be passed pseudo-elements and + * other pseudo-classes as values, which means :pseudo(::pseudo) + * is legal. + */ + private function pseudoClassValue() { + if ($this->scanner->token == Token::lparen) { + $buf = ''; + + // For now, just leave pseudoClass value vague. + /* + // We have to peek to see if next char is a colon because + // pseudo-classes and pseudo-elements are legal strings here. + print $this->scanner->peek(); + if ($this->scanner->peek() == ':') { + print "Is pseudo\n"; + $this->scanner->nextToken(); + + // Pseudo class + if ($this->scanner->token == Token::colon) { + $buf .= ':'; + $this->scanner->nextToken(); + // Pseudo element + if ($this->scanner->token == Token::colon) { + $buf .= ':'; + $this->scanner->nextToken(); + } + // Ident + $buf .= $this->scanner->getNameString(); + } + } + else { + print "fetching string.\n"; + $buf .= $this->scanner->getQuotedString(); + if ($this->scanner->token != Token::rparen) { + $this->throwError(Token::rparen, $this->scanner->token); + } + $this->scanner->nextToken(); + } + return $buf; + */ + //$buf .= $this->scanner->getQuotedString(); + $buf .= $this->scanner->getPseudoClassString(); + return $buf; + } + } + + /** + * Handle element names. + * This will call the EventHandler::elementName(). + * + * This handles: + * + * name (EventHandler::element()) + * |name (EventHandler::element()) + * ns|name (EventHandler::elementNS()) + * ns|* (EventHandler::elementNS()) + * + */ + private function elementName() { + if ($this->DEBUG) print "ELEMENT NAME\n"; + if ($this->scanner->token === Token::pipe) { + // We have '|name', which is equiv to 'name' + $this->scanner->nextToken(); + $this->consumeWhitespace(); + $elementName = $this->scanner->getNameString(); + $this->handler->element($elementName); + } + elseif ($this->scanner->token === Token::char) { + $elementName = $this->scanner->getNameString(); + if ($this->scanner->token == Token::pipe) { + // Get ns|name + $elementNS = $elementName; + $this->scanner->nextToken(); + $this->consumeWhitespace(); + if ($this->scanner->token === Token::star) { + // We have ns|* + $this->handler->anyElementInNS($elementNS); + $this->scanner->nextToken(); + } + elseif ($this->scanner->token !== Token::char) { + $this->throwError(Token::char, $this->scanner->token); + } + else { + $elementName = $this->scanner->getNameString(); + // We have ns|name + $this->handler->elementNS($elementName, $elementNS); + } + + } + else { + $this->handler->element($elementName); + } + } + } + + /** + * Check for all elements designators. Due to the new CSS 3 namespace + * support, this is slightly more complicated, now, as it handles + * the *|name and *|* cases as well as *. + * + * Calls EventHandler::anyElement() or EventHandler::elementName(). + */ + private function allElements() { + if ($this->scanner->token === Token::star) { + $this->scanner->nextToken(); + if ($this->scanner->token === Token::pipe) { + $this->scanner->nextToken(); + if ($this->scanner->token === Token::star) { + // We got *|*. According to spec, this requires + // that the element has a namespace, so we pass it on + // to the handler: + $this->scanner->nextToken(); + $this->handler->anyElementInNS('*'); + } + else { + // We got *|name, which means the name MUST be in a namespce, + // so we pass this off to elementNameNS(). + $name = $this->scanner->getNameString(); + $this->handler->elementNS($name, '*'); + } + } + else { + $this->handler->anyElement(); + } + } + } + + /** + * Handler an attribute. + * An attribute can be in one of two forms: + * [attrName] + * or + * [attrName="AttrValue"] + * + * This may call the following event handlers: EventHandler::attribute(). + */ + private function attribute() { + if($this->scanner->token == Token::lsquare) { + $attrVal = $op = $ns = NULL; + + $this->scanner->nextToken(); + $this->consumeWhitespace(); + + if ($this->scanner->token === Token::at) { + if ($this->strict) { + throw new ParseException('The @ is illegal in attributes.'); + } + else { + $this->scanner->nextToken(); + $this->consumeWhitespace(); + } + } + + if ($this->scanner->token === Token::star) { + // Global namespace... requires that attr be prefixed, + // so we pass this on to a namespace handler. + $ns = '*'; + $this->scanner->nextToken(); + } + if ($this->scanner->token === Token::pipe) { + // Skip this. It's a global namespace. + $this->scanner->nextToken(); + $this->consumeWhitespace(); + } + + $attrName = $this->scanner->getNameString(); + $this->consumeWhitespace(); + + // Check for namespace attribute: ns|attr. We have to peek() to make + // sure that we haven't hit the |= operator, which looks the same. + if ($this->scanner->token === Token::pipe && $this->scanner->peek() !== '=') { + // We have a namespaced attribute. + $ns = $attrName; + $this->scanner->nextToken(); + $attrName = $this->scanner->getNameString(); + $this->consumeWhitespace(); + } + + // Note: We require that operators do not have spaces + // between characters, e.g. ~= , not ~ =. + + // Get the operator: + switch ($this->scanner->token) { + case Token::eq: + $this->consumeWhitespace(); + $op = EventHandler::isExactly; + break; + case Token::tilde: + if ($this->scanner->nextToken() !== Token::eq) { + $this->throwError(Token::eq, $this->scanner->token); + } + $op = EventHandler::containsWithSpace; + break; + case Token::pipe: + if ($this->scanner->nextToken() !== Token::eq) { + $this->throwError(Token::eq, $this->scanner->token); + } + $op = EventHandler::containsWithHyphen; + break; + case Token::star: + if ($this->scanner->nextToken() !== Token::eq) { + $this->throwError(Token::eq, $this->scanner->token); + } + $op = EventHandler::containsInString; + break; + case Token::dollar; + if ($this->scanner->nextToken() !== Token::eq) { + $this->throwError(Token::eq, $this->scanner->token); + } + $op = EventHandler::endsWith; + break; + case Token::carat: + if ($this->scanner->nextToken() !== Token::eq) { + $this->throwError(Token::eq, $this->scanner->token); + } + $op = EventHandler::beginsWith; + break; + } + + if (isset($op)) { + // Consume '=' and go on. + $this->scanner->nextToken(); + $this->consumeWhitespace(); + + // So... here we have a problem. The grammer suggests that the + // value here is String1 or String2, both of which are enclosed + // in quotes of some sort, and both of which allow lots of special + // characters. But the spec itself includes examples like this: + // [lang=fr] + // So some bareword support is assumed. To get around this, we assume + // that bare words follow the NAME rules, while quoted strings follow + // the String1/String2 rules. + + if ($this->scanner->token === Token::quote || $this->scanner->token === Token::squote) { + $attrVal = $this->scanner->getQuotedString(); + } + else { + $attrVal = $this->scanner->getNameString(); + } + + if ($this->DEBUG) { + print "ATTR: $attrVal AND OP: $op\n"; + } + } + + $this->consumeWhitespace(); + + if ($this->scanner->token != Token::rsquare) { + $this->throwError(Token::rsquare, $this->scanner->token); + } + + if (isset($ns)) { + $this->handler->attributeNS($attrName, $ns, $attrVal, $op); + } + elseif (isset($attrVal)) { + $this->handler->attribute($attrName, $attrVal, $op); + } + else { + $this->handler->attribute($attrName); + } + $this->scanner->nextToken(); + } + } + + /** + * Utility for throwing a consistantly-formatted parse error. + */ + private function throwError($expected, $got) { + $filter = sprintf('Expected %s, got %s', Token::name($expected), Token::name($got)); + throw new ParseException($filter); + } + +} + diff --git a/lib/querypath/src/QueryPath/CSS/QueryPathEventHandler.php b/lib/querypath/src/QueryPath/CSS/QueryPathEventHandler.php new file mode 100644 index 0000000..2dcfd57 --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/QueryPathEventHandler.php @@ -0,0 +1,1424 @@ +stdClass objects with a text property (QP > 1.3) + * instead of elements. + * - The pseudo-classes first-of-type, nth-of-type and last-of-type may or may + * not conform to the specification. The spec is unclear. + * - pseudo-class filters of the form -an+b do not function as described in the + * specification. However, they do behave the same way here as they do in + * jQuery. + * - This library DOES provide XML namespace aware tools. Selectors can use + * namespaces to increase specificity. + * - This library does nothing with the CSS 3 Selector specificity rating. Of + * course specificity is preserved (to the best of our abilities), but there + * is no calculation done. + * + * For detailed examples of how the code works and what selectors are supported, + * see the CssEventTests file, which contains the unit tests used for + * testing this implementation. + * + * @author M Butcher + * @license MIT + */ + +namespace QueryPath\CSS; + +/** + * Handler that tracks progress of a query through a DOM. + * + * The main idea is that we keep a copy of the tree, and then use an + * array to keep track of matches. To handle a list of selectors (using + * the comma separator), we have to track both the currently progressing + * match and the previously matched elements. + * + * To use this handler: + * @code + * $filter = '#id'; // Some CSS selector + * $handler = new QueryPathEventHandler(DOMNode $dom); + * $parser = new Parser(); + * $parser->parse($filter, $handler); + * $matches = $handler->getMatches(); + * @endcode + * + * $matches will be an array of zero or more DOMElement objects. + * + * @ingroup querypath_css + */ +class QueryPathEventHandler implements EventHandler, Traverser { + protected $dom = NULL; // Always points to the top level. + protected $matches = NULL; // The matches + protected $alreadyMatched = NULL; // Matches found before current selector. + protected $findAnyElement = TRUE; + + + /** + * Create a new event handler. + */ + public function __construct($dom) { + $this->alreadyMatched = new \SplObjectStorage(); + $matches = new \SplObjectStorage(); + + // Array of DOMElements + if (is_array($dom) || $dom instanceof \SplObjectStorage) { + //$matches = array(); + foreach($dom as $item) { + if ($item instanceof \DOMNode && $item->nodeType == XML_ELEMENT_NODE) { + //$matches[] = $item; + $matches->attach($item); + } + } + //$this->dom = count($matches) > 0 ? $matches[0] : NULL; + if ($matches->count() > 0) { + $matches->rewind(); + $this->dom = $matches->current(); + } + else { + //throw new Exception("Setting DOM to Null"); + $this->dom = NULL; + } + $this->matches = $matches; + } + // DOM Document -- we get the root element. + elseif ($dom instanceof \DOMDocument) { + $this->dom = $dom->documentElement; + $matches->attach($dom->documentElement); + } + // DOM Element -- we use this directly + elseif ($dom instanceof \DOMElement) { + $this->dom = $dom; + $matches->attach($dom); + } + // NodeList -- We turn this into an array + elseif ($dom instanceof \DOMNodeList) { + $a = array(); // Not sure why we are doing this.... + foreach ($dom as $item) { + if ($item->nodeType == XML_ELEMENT_NODE) { + $matches->attach($item); + $a[] = $item; + } + } + $this->dom = $a; + } + // FIXME: Handle SimpleXML! + // Uh-oh... we don't support anything else. + else { + throw new \QueryPath\Exception("Unhandled type: " . get_class($dom)); + } + $this->matches = $matches; + } + + /** + * Generic finding method. + * + * This is the primary searching method used throughout QueryPath. + * + * @param string $filter + * A valid CSS 3 filter. + * @return QueryPathEventHandler + * Returns itself. + */ + public function find($filter) { + $parser = new Parser($filter, $this); + $parser->parse(); + return $this; + } + + /** + * Get the elements that match the evaluated selector. + * + * This should be called after the filter has been parsed. + * + * @return array + * The matched items. This is almost always an array of + * {@link DOMElement} objects. It is always an instance of + * {@link DOMNode} objects. + */ + public function getMatches() { + //$result = array_merge($this->alreadyMatched, $this->matches); + $result = new \SplObjectStorage(); + foreach($this->alreadyMatched as $m) $result->attach($m); + foreach($this->matches as $m) $result->attach($m); + return $result; + } + + public function matches() { + return $this->getMatches(); + } + + /** + * Find any element with the ID that matches $id. + * + * If this finds an ID, it will immediately quit. Essentially, it doesn't + * enforce ID uniqueness, but it assumes it. + * + * @param $id + * String ID for an element. + */ + public function elementID($id) { + $found = new \SplObjectStorage(); + $matches = $this->candidateList(); + foreach ($matches as $item) { + // Check if any of the current items has the desired ID. + if ($item->hasAttribute('id') && $item->getAttribute('id') === $id) { + $found->attach($item); + break; + } + } + $this->matches = $found; + $this->findAnyElement = FALSE; + } + + // Inherited + public function element($name) { + $matches = $this->candidateList(); + $this->findAnyElement = FALSE; + $found = new \SplObjectStorage(); + foreach ($matches as $item) { + // Should the existing item be included? + // In some cases (e.g. element is root element) + // it definitely should. But what about other cases? + if ($item->tagName == $name) { + $found->attach($item); + } + // Search for matching kids. + //$nl = $item->getElementsByTagName($name); + //$found = array_merge($found, $this->nodeListToArray($nl)); + } + + $this->matches = $found; + } + + // Inherited + public function elementNS($lname, $namespace = NULL) { + $this->findAnyElement = FALSE; + $found = new \SplObjectStorage(); + $matches = $this->candidateList(); + foreach ($matches as $item) { + // Looking up NS URI only works if the XMLNS attributes are declared + // at a level equal to or above the searching doc. Normalizing a doc + // should fix this, but it doesn't. So we have to use a fallback + // detection scheme which basically searches by lname and then + // does a post hoc check on the tagname. + + //$nsuri = $item->lookupNamespaceURI($namespace); + $nsuri = $this->dom->lookupNamespaceURI($namespace); + + // XXX: Presumably the base item needs to be checked. Spec isn't + // too clear, but there are three possibilities: + // - base should always be checked (what we do here) + // - base should never be checked (only children) + // - base should only be checked if it is the root node + if ($item instanceof \DOMNode + && $item->namespaceURI == $nsuri + && $lname == $item->localName) { + $found->attach($item); + } + + if (!empty($nsuri)) { + $nl = $item->getElementsByTagNameNS($nsuri, $lname); + // If something is found, merge them: + //if (!empty($nl)) $found = array_merge($found, $this->nodeListToArray($nl)); + if (!empty($nl)) $this->attachNodeList($nl, $found); + } + else { + //$nl = $item->getElementsByTagName($namespace . ':' . $lname); + $nl = $item->getElementsByTagName($lname); + $tagname = $namespace . ':' . $lname; + $nsmatches = array(); + foreach ($nl as $node) { + if ($node->tagName == $tagname) { + //$nsmatches[] = $node; + $found->attach($node); + } + } + // If something is found, merge them: + //if (!empty($nsmatches)) $found = array_merge($found, $nsmatches); + } + } + $this->matches = $found; + } + + public function anyElement() { + $found = new \SplObjectStorage(); + //$this->findAnyElement = TRUE; + $matches = $this->candidateList(); + foreach ($matches as $item) { + $found->attach($item); // Add self + // See issue #20 or section 6.2 of this: + // http://www.w3.org/TR/2009/PR-css3-selectors-20091215/#universal-selector + //$nl = $item->getElementsByTagName('*'); + //$this->attachNodeList($nl, $found); + } + + $this->matches = $found; + $this->findAnyElement = FALSE; + } + public function anyElementInNS($ns) { + //$this->findAnyElement = TRUE; + $nsuri = $this->dom->lookupNamespaceURI($ns); + $found = new \SplObjectStorage(); + if (!empty($nsuri)) { + $matches = $this->candidateList(); + foreach ($matches as $item) { + if ($item instanceOf \DOMNode && $nsuri == $item->namespaceURI) { + $found->attach($item); + } + } + } + $this->matches = $found;//UniqueElementList::get($found); + $this->findAnyElement = FALSE; + } + public function elementClass($name) { + + $found = new \SplObjectStorage(); + $matches = $this->candidateList(); + foreach ($matches as $item) { + if ($item->hasAttribute('class')) { + $classes = explode(' ', $item->getAttribute('class')); + if (in_array($name, $classes)) $found->attach($item); + } + } + + $this->matches = $found;//UniqueElementList::get($found); + $this->findAnyElement = FALSE; + } + + public function attribute($name, $value = NULL, $operation = EventHandler::isExactly) { + $found = new \SplObjectStorage(); + $matches = $this->candidateList(); + foreach ($matches as $item) { + if ($item->hasAttribute($name)) { + if (isset($value)) { + // If a value exists, then we need a match. + if($this->attrValMatches($value, $item->getAttribute($name), $operation)) { + $found->attach($item); + } + } + else { + // If no value exists, then we consider it a match. + $found->attach($item); + } + } + } + $this->matches = $found; //UniqueElementList::get($found); + $this->findAnyElement = FALSE; + } + + /** + * Helper function to find all elements with exact matches. + * + * @deprecated All use cases seem to be covered by attribute(). + */ + protected function searchForAttr($name, $value = NULL) { + $found = new \SplObjectStorage(); + $matches = $this->candidateList(); + foreach ($matches as $candidate) { + if ($candidate->hasAttribute($name)) { + // If value is required, match that, too. + if (isset($value) && $value == $candidate->getAttribute($name)) { + $found->attach($candidate); + } + // Otherwise, it's a match on name alone. + else { + $found->attach($candidate); + } + } + } + + $this->matches = $found; + } + + public function attributeNS($lname, $ns, $value = NULL, $operation = EventHandler::isExactly) { + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + if (count($matches) == 0) { + $this->matches = $found; + return; + } + + // Get the namespace URI for the given label. + //$uri = $matches[0]->lookupNamespaceURI($ns); + $matches->rewind(); + $e = $matches->current(); + $uri = $e->lookupNamespaceURI($ns); + + foreach ($matches as $item) { + //foreach ($item->attributes as $attr) { + // print "$attr->prefix:$attr->localName ($attr->namespaceURI), Value: $attr->nodeValue\n"; + //} + if ($item->hasAttributeNS($uri, $lname)) { + if (isset($value)) { + if ($this->attrValMatches($value, $item->getAttributeNS($uri, $lname), $operation)) { + $found->attach($item); + } + } + else { + $found->attach($item); + } + } + } + $this->matches = $found; + $this->findAnyElement = FALSE; + } + + /** + * This also supports the following nonstandard pseudo classes: + * - :x-reset/:x-root (reset to the main item passed into the constructor. Less drastic than :root) + * - :odd/:even (shorthand for :nth-child(odd)/:nth-child(even)) + */ + public function pseudoClass($name, $value = NULL) { + $name = strtolower($name); + // Need to handle known pseudoclasses. + switch($name) { + case 'visited': + case 'hover': + case 'active': + case 'focus': + case 'animated': // Last 3 are from jQuery + case 'visible': + case 'hidden': + // These require a UA, which we don't have. + case 'target': + // This requires a location URL, which we don't have. + $this->matches = new \SplObjectStorage(); + break; + case 'indeterminate': + // The assumption is that there is a UA and the format is HTML. + // I don't know if this should is useful without a UA. + throw new NotImplementedException(":indeterminate is not implemented."); + break; + case 'lang': + // No value = exception. + if (!isset($value)) { + throw new NotImplementedException("No handler for lang pseudoclass without value."); + } + $this->lang($value); + break; + case 'link': + $this->searchForAttr('href'); + break; + case 'root': + $found = new \SplObjectStorage(); + if (empty($this->dom)) { + $this->matches = $found; + } + elseif (is_array($this->dom)) { + $found->attach($this->dom[0]->ownerDocument->documentElement); + $this->matches = $found; + } + elseif ($this->dom instanceof \DOMNode) { + $found->attach($this->dom->ownerDocument->documentElement); + $this->matches = $found; + } + elseif ($this->dom instanceof \DOMNodeList && $this->dom->length > 0) { + $found->attach($this->dom->item(0)->ownerDocument->documentElement); + $this->matches = $found; + } + else { + // Hopefully we never get here: + $found->attach($this->dom); + $this->matches = $found; + } + break; + + // NON-STANDARD extensions for reseting to the "top" items set in + // the constructor. + case 'x-root': + case 'x-reset': + $this->matches = new \SplObjectStorage(); + $this->matches->attach($this->dom); + break; + + // NON-STANDARD extensions for simple support of even and odd. These + // are supported by jQuery, FF, and other user agents. + case 'even': + $this->nthChild(2, 0); + break; + case 'odd': + $this->nthChild(2, 1); + break; + + // Standard child-checking items. + case 'nth-child': + list($aVal, $bVal) = $this->parseAnB($value); + $this->nthChild($aVal, $bVal); + break; + case 'nth-last-child': + list($aVal, $bVal) = $this->parseAnB($value); + $this->nthLastChild($aVal, $bVal); + break; + case 'nth-of-type': + list($aVal, $bVal) = $this->parseAnB($value); + $this->nthOfTypeChild($aVal, $bVal, FALSE); + break; + case 'nth-last-of-type': + list($aVal, $bVal) = $this->parseAnB($value); + $this->nthLastOfTypeChild($aVal, $bVal); + break; + case 'first-child': + $this->nthChild(0, 1); + break; + case 'last-child': + $this->nthLastChild(0, 1); + break; + case 'first-of-type': + $this->firstOfType(); + break; + case 'last-of-type': + $this->lastOfType(); + break; + case 'only-child': + $this->onlyChild(); + break; + case 'only-of-type': + $this->onlyOfType(); + break; + case 'empty': + $this->emptyElement(); + break; + case 'not': + if (empty($value)) { + throw new ParseException(":not() requires a value."); + } + $this->not($value); + break; + // Additional pseudo-classes defined in jQuery: + case 'lt': + case 'gt': + case 'nth': + case 'eq': + case 'first': + case 'last': + //case 'even': + //case 'odd': + $this->getByPosition($name, $value); + break; + case 'parent': + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + foreach ($matches as $match) { + if (!empty($match->firstChild)) { + $found->attach($match); + } + } + $this->matches = $found; + break; + + case 'enabled': + case 'disabled': + case 'checked': + $this->attribute($name); + break; + case 'text': + case 'radio': + case 'checkbox': + case 'file': + case 'password': + case 'submit': + case 'image': + case 'reset': + case 'button': + $this->attribute('type', $name); + break; + + case 'header': + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + foreach ($matches as $item) { + $tag = $item->tagName; + $f = strtolower(substr($tag, 0, 1)); + if ($f == 'h' && strlen($tag) == 2 && ctype_digit(substr($tag, 1, 1))) { + $found->attach($item); + } + } + $this->matches = $found; + break; + case 'has': + $this->has($value); + break; + // Contains == text matches. + // In QP 2.1, this was changed. + case 'contains': + $value = $this->removeQuotes($value); + + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + foreach ($matches as $item) { + if (strpos($item->textContent, $value) !== FALSE) { + $found->attach($item); + } + } + $this->matches = $found; + break; + + // Since QP 2.1 + case 'contains-exactly': + $value = $this->removeQuotes($value); + + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + foreach ($matches as $item) { + if ($item->textContent == $value) { + $found->attach($item); + } + } + $this->matches = $found; + break; + default: + throw new ParseException("Unknown Pseudo-Class: " . $name); + } + $this->findAnyElement = FALSE; + } + + /** + * Remove leading and trailing quotes. + */ + private function removeQuotes($str) { + $f = substr($str, 0, 1); + $l = substr($str, -1); + if ($f === $l && ($f == '"' || $f == "'")) { + $str = substr($str, 1, -1); + } + return $str; + } + + /** + * Pseudo-class handler for a variety of jQuery pseudo-classes. + * Handles lt, gt, eq, nth, first, last pseudo-classes. + */ + private function getByPosition($operator, $pos) { + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + if ($matches->count() == 0) { + return; + } + + switch ($operator) { + case 'nth': + case 'eq': + if ($matches->count() >= $pos) { + //$found[] = $matches[$pos -1]; + foreach ($matches as $match) { + // CSS is 1-based, so we pre-increment. + if ($matches->key() + 1 == $pos) { + $found->attach($match); + break; + } + } + } + break; + case 'first': + if ($matches->count() > 0) { + $matches->rewind(); // This is necessary to init. + $found->attach($matches->current()); + } + break; + case 'last': + if ($matches->count() > 0) { + + // Spin through iterator. + foreach ($matches as $item) {}; + + $found->attach($item); + } + break; + // case 'even': + // for ($i = 1; $i <= count($matches); ++$i) { + // if ($i % 2 == 0) { + // $found[] = $matches[$i]; + // } + // } + // break; + // case 'odd': + // for ($i = 1; $i <= count($matches); ++$i) { + // if ($i % 2 == 0) { + // $found[] = $matches[$i]; + // } + // } + // break; + case 'lt': + $i = 0; + foreach ($matches as $item) { + if (++$i < $pos) { + $found->attach($item); + } + } + break; + case 'gt': + $i = 0; + foreach ($matches as $item) { + if (++$i > $pos) { + $found->attach($item); + } + } + break; + } + + $this->matches = $found; + } + + /** + * Parse an an+b rule for CSS pseudo-classes. + * @param $rule + * Some rule in the an+b format. + * @return + * Array (list($aVal, $bVal)) of the two values. + * @throws ParseException + * If the rule does not follow conventions. + */ + protected function parseAnB($rule) { + if ($rule == 'even') { + return array(2, 0); + } + elseif ($rule == 'odd') { + return array(2, 1); + } + elseif ($rule == 'n') { + return array(1, 0); + } + elseif (is_numeric($rule)) { + return array(0, (int)$rule); + } + + $rule = explode('n', $rule); + if (count($rule) == 0) { + throw new ParseException("nth-child value is invalid."); + } + + // Each of these is legal: 1, -1, and -. '-' is shorthand for -1. + $aVal = trim($rule[0]); + $aVal = ($aVal == '-') ? -1 : (int)$aVal; + + $bVal = !empty($rule[1]) ? (int)trim($rule[1]) : 0; + return array($aVal, $bVal); + } + + /** + * Pseudo-class handler for nth-child and all related pseudo-classes. + * + * @param int $groupSize + * The size of the group (in an+b, this is a). + * @param int $elementInGroup + * The offset in a group. (in an+b this is b). + * @param boolean $lastChild + * Whether counting should begin with the last child. By default, this is false. + * Pseudo-classes that start with the last-child can set this to true. + */ + protected function nthChild($groupSize, $elementInGroup, $lastChild = FALSE) { + // EXPERIMENTAL: New in Quark. This should be substantially faster + // than the old (jQuery-ish) version. It still has E_STRICT violations + // though. + $parents = new \SplObjectStorage(); + $matches = new \SplObjectStorage(); + + $i = 0; + foreach ($this->matches as $item) { + $parent = $item->parentNode; + + // Build up an array of all of children of this parent, and store the + // index of each element for reference later. We only need to do this + // once per parent, though. + if (!$parents->contains($parent)) { + + $c = 0; + foreach ($parent->childNodes as $child) { + // We only want nodes, and if this call is preceded by an element + // selector, we only want to match elements with the same tag name. + // !!! This last part is a grey area in the CSS 3 Selector spec. It seems + // necessary to make the implementation match the examples in the spec. However, + // jQuery 1.2 does not do this. + if ($child->nodeType == XML_ELEMENT_NODE && ($this->findAnyElement || $child->tagName == $item->tagName)) { + // This may break E_STRICT. + $child->nodeIndex = ++$c; + } + } + // This may break E_STRICT. + $parent->numElements = $c; + $parents->attach($parent); + } + + // If we are looking for the last child, we count from the end of a list. + // Note that we add 1 because CSS indices begin at 1, not 0. + if ($lastChild) { + $indexToMatch = $item->parentNode->numElements - $item->nodeIndex + 1; + } + // Otherwise we count from the beginning of the list. + else { + $indexToMatch = $item->nodeIndex; + } + + // If group size is 0, then we return element at the right index. + if ($groupSize == 0) { + if ($indexToMatch == $elementInGroup) + $matches->attach($item); + } + // If group size != 0, then we grab nth element from group offset by + // element in group. + else { + if (($indexToMatch - $elementInGroup) % $groupSize == 0 + && ($indexToMatch - $elementInGroup) / $groupSize >= 0) { + $matches->attach($item); + } + } + + // Iterate. + ++$i; + } + $this->matches = $matches; + } + + /** + * Reverse a set of matches. + * + * This is now necessary because internal matches are no longer represented + * as arrays. + * @since QueryPath 2.0 + *//* + private function reverseMatches() { + // Reverse the candidate list. There must be a better way of doing + // this. + $arr = array(); + foreach ($this->matches as $m) array_unshift($arr, $m); + + $this->found = new \SplObjectStorage(); + foreach ($arr as $item) $this->found->attach($item); + }*/ + + /** + * Pseudo-class handler for :nth-last-child and related pseudo-classes. + */ + protected function nthLastChild($groupSize, $elementInGroup) { + // New in Quark. + $this->nthChild($groupSize, $elementInGroup, TRUE); + } + + /** + * Get a list of peer elements. + * If $requireSameTag is TRUE, then only peer elements with the same + * tagname as the given element will be returned. + * + * @param $element + * A DomElement. + * @param $requireSameTag + * Boolean flag indicating whether all matches should have the same + * element name (tagName) as $element. + * @return + * Array of peer elements. + *//* + protected function listPeerElements($element, $requireSameTag = FALSE) { + $peers = array(); + $parent = $element->parentNode; + foreach ($parent->childNodes as $node) { + if ($node->nodeType == XML_ELEMENT_NODE) { + if ($requireSameTag) { + // Need to make sure that the tag matches: + if ($element->tagName == $node->tagName) { + $peers[] = $node; + } + } + else { + $peers[] = $node; + } + } + } + return $peers; + } + */ + /** + * Get the nth child (by index) from matching candidates. + * + * This is used by pseudo-class handlers. + */ + /* + protected function childAtIndex($index, $tagName = NULL) { + $restrictToElement = !$this->findAnyElement; + $matches = $this->candidateList(); + $defaultTagName = $tagName; + + // XXX: Added in Quark: I believe this should return an empty + // match set if no child was found tat the index. + $this->matches = new \SplObjectStorage(); + + foreach ($matches as $item) { + $parent = $item->parentNode; + + // If a default tag name is supplied, we always use it. + if (!empty($defaultTagName)) { + $tagName = $defaultTagName; + } + // If we are inside of an element selector, we use the + // tag name of the given elements. + elseif ($restrictToElement) { + $tagName = $item->tagName; + } + // Otherwise, we skip the tag name match. + else { + $tagName = NULL; + } + + // Loop through all children looking for matches. + $i = 0; + foreach ($parent->childNodes as $child) { + if ($child->nodeType !== XML_ELEMENT_NODE) { + break; // Skip non-elements + } + + // If type is set, then we do type comparison + if (!empty($tagName)) { + // Check whether tag name matches the type. + if ($child->tagName == $tagName) { + // See if this is the index we are looking for. + if ($i == $index) { + //$this->matches = new \SplObjectStorage(); + $this->matches->attach($child); + return; + } + // If it's not the one we are looking for, increment. + ++$i; + } + } + // We don't care about type. Any tagName will match. + else { + if ($i == $index) { + $this->matches->attach($child); + return; + } + ++$i; + } + } // End foreach + } + + }*/ + + /** + * Pseudo-class handler for nth-of-type-child. + * Not implemented. + */ + protected function nthOfTypeChild($groupSize, $elementInGroup, $lastChild) { + // EXPERIMENTAL: New in Quark. This should be substantially faster + // than the old (jQuery-ish) version. It still has E_STRICT violations + // though. + $parents = new \SplObjectStorage(); + $matches = new \SplObjectStorage(); + + $i = 0; + foreach ($this->matches as $item) { + $parent = $item->parentNode; + + // Build up an array of all of children of this parent, and store the + // index of each element for reference later. We only need to do this + // once per parent, though. + if (!$parents->contains($parent)) { + + $c = 0; + foreach ($parent->childNodes as $child) { + // This doesn't totally make sense, since the CSS 3 spec does not require that + // this pseudo-class be adjoined to an element (e.g. ' :nth-of-type' is allowed). + if ($child->nodeType == XML_ELEMENT_NODE && $child->tagName == $item->tagName) { + // This may break E_STRICT. + $child->nodeIndex = ++$c; + } + } + // This may break E_STRICT. + $parent->numElements = $c; + $parents->attach($parent); + } + + // If we are looking for the last child, we count from the end of a list. + // Note that we add 1 because CSS indices begin at 1, not 0. + if ($lastChild) { + $indexToMatch = $item->parentNode->numElements - $item->nodeIndex + 1; + } + // Otherwise we count from the beginning of the list. + else { + $indexToMatch = $item->nodeIndex; + } + + // If group size is 0, then we return element at the right index. + if ($groupSize == 0) { + if ($indexToMatch == $elementInGroup) + $matches->attach($item); + } + // If group size != 0, then we grab nth element from group offset by + // element in group. + else { + if (($indexToMatch - $elementInGroup) % $groupSize == 0 + && ($indexToMatch - $elementInGroup) / $groupSize >= 0) { + $matches->attach($item); + } + } + + // Iterate. + ++$i; + } + $this->matches = $matches; + } + + /** + * Pseudo-class handler for nth-last-of-type-child. + * Not implemented. + */ + protected function nthLastOfTypeChild($groupSize, $elementInGroup) { + $this->nthOfTypeChild($groupSize, $elementInGroup, TRUE); + } + + /** + * Pseudo-class handler for :lang + */ + protected function lang($value) { + // TODO: This checks for cases where an explicit language is + // set. The spec seems to indicate that an element should inherit + // language from the parent... but this is unclear. + $operator = (strpos($value, '-') !== FALSE) ? self::isExactly : self::containsWithHyphen; + + $orig = $this->matches; + $origDepth = $this->findAnyElement; + + // Do first pass: attributes in default namespace + $this->attribute('lang', $value, $operator); + $lang = $this->matches; // Temp array for merging. + + // Reset + $this->matches = $orig; + $this->findAnyElement = $origDepth; + + // Do second pass: attributes in 'xml' namespace. + $this->attributeNS('lang', 'xml', $value, $operator); + + + // Merge results. + // FIXME: Note that we lose natural ordering in + // the document because we search for xml:lang separately + // from lang. + foreach ($this->matches as $added) $lang->attach($added); + $this->matches = $lang; + } + + /** + * Pseudo-class handler for :not(filter). + * + * This does not follow the specification in the following way: The CSS 3 + * selector spec says the value of not() must be a simple selector. This + * function allows complex selectors. + * + * @param string $filter + * A CSS selector. + */ + protected function not($filter) { + $matches = $this->candidateList(); + //$found = array(); + $found = new \SplObjectStorage(); + foreach ($matches as $item) { + $handler = new QueryPathEventHandler($item); + $not_these = $handler->find($filter)->getMatches(); + if ($not_these->count() == 0) { + $found->attach($item); + } + } + // No need to check for unique elements, since the list + // we began from already had no duplicates. + $this->matches = $found; + } + + /** + * Pseudo-class handler for :has(filter). + * This can also be used as a general filtering routine. + */ + public function has($filter) { + $matches = $this->candidateList(); + //$found = array(); + $found = new \SplObjectStorage(); + foreach ($matches as $item) { + $handler = new QueryPathEventHandler($item); + $these = $handler->find($filter)->getMatches(); + if (count($these) > 0) { + $found->attach($item); + } + } + $this->matches = $found; + return $this; + } + + /** + * Pseudo-class handler for :first-of-type. + */ + protected function firstOfType() { + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + foreach ($matches as $item) { + $type = $item->tagName; + $parent = $item->parentNode; + foreach ($parent->childNodes as $kid) { + if ($kid->nodeType == XML_ELEMENT_NODE && $kid->tagName == $type) { + if (!$found->contains($kid)) { + $found->attach($kid); + } + break; + } + } + } + $this->matches = $found; + } + + /** + * Pseudo-class handler for :last-of-type. + */ + protected function lastOfType() { + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + foreach ($matches as $item) { + $type = $item->tagName; + $parent = $item->parentNode; + for ($i = $parent->childNodes->length - 1; $i >= 0; --$i) { + $kid = $parent->childNodes->item($i); + if ($kid->nodeType == XML_ELEMENT_NODE && $kid->tagName == $type) { + if (!$found->contains($kid)) { + $found->attach($kid); + } + break; + } + } + } + $this->matches = $found; + } + + /** + * Pseudo-class handler for :only-child. + */ + protected function onlyChild() { + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + foreach($matches as $item) { + $parent = $item->parentNode; + $kids = array(); + foreach($parent->childNodes as $kid) { + if ($kid->nodeType == XML_ELEMENT_NODE) { + $kids[] = $kid; + } + } + // There should be only one child element, and + // it should be the one being tested. + if (count($kids) == 1 && $kids[0] === $item) { + $found->attach($kids[0]); + } + } + $this->matches = $found; + } + + /** + * Pseudo-class handler for :empty. + */ + protected function emptyElement() { + $found = new \SplObjectStorage(); + $matches = $this->candidateList(); + foreach ($matches as $item) { + $empty = TRUE; + foreach($item->childNodes as $kid) { + // From the spec: Elements and Text nodes are the only ones to + // affect emptiness. + if ($kid->nodeType == XML_ELEMENT_NODE || $kid->nodeType == XML_TEXT_NODE) { + $empty = FALSE; + break; + } + } + if ($empty) { + $found->attach($item); + } + } + $this->matches = $found; + } + + /** + * Pseudo-class handler for :only-of-type. + */ + protected function onlyOfType() { + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + foreach ($matches as $item) { + if (!$item->parentNode) { + $this->matches = new \SplObjectStorage(); + } + $parent = $item->parentNode; + $onlyOfType = TRUE; + + // See if any peers are of the same type + foreach($parent->childNodes as $kid) { + if ($kid->nodeType == XML_ELEMENT_NODE + && $kid->tagName == $item->tagName + && $kid !== $item) { + //$this->matches = new \SplObjectStorage(); + $onlyOfType = FALSE; + break; + } + } + + // If no others were found, attach this one. + if ($onlyOfType) $found->attach($item); + } + $this->matches = $found; + } + + /** + * Check for attr value matches based on an operation. + */ + protected function attrValMatches($needle, $haystack, $operation) { + + if (strlen($haystack) < strlen($needle)) return FALSE; + + // According to the spec: + // "The case-sensitivity of attribute names in selectors depends on the document language." + // (6.3.2) + // To which I say, "huh?". We assume case sensitivity. + switch ($operation) { + case EventHandler::isExactly: + return $needle == $haystack; + case EventHandler::containsWithSpace: + return in_array($needle, explode(' ', $haystack)); + case EventHandler::containsWithHyphen: + return in_array($needle, explode('-', $haystack)); + case EventHandler::containsInString: + return strpos($haystack, $needle) !== FALSE; + case EventHandler::beginsWith: + return strpos($haystack, $needle) === 0; + case EventHandler::endsWith: + //return strrpos($haystack, $needle) === strlen($needle) - 1; + return preg_match('/' . $needle . '$/', $haystack) == 1; + } + return FALSE; // Shouldn't be able to get here. + } + + /** + * As the spec mentions, these must be at the end of a selector or + * else they will cause errors. Most selectors return elements. Pseudo-elements + * do not. + */ + public function pseudoElement($name) { + // process the pseudoElement + switch ($name) { + // XXX: Should this return an array -- first line of + // each of the matched elements? + case 'first-line': + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + $o = new \stdClass(); + foreach ($matches as $item) { + $str = $item->textContent; + $lines = explode("\n", $str); + if (!empty($lines)) { + $line = trim($lines[0]); + if (!empty($line)) { + $o->textContent = $line; + $found->attach($o);//trim($lines[0]); + } + } + } + $this->matches = $found; + break; + // XXX: Should this return an array -- first letter of each + // of the matched elements? + case 'first-letter': + $matches = $this->candidateList(); + $found = new \SplObjectStorage(); + $o = new \stdClass(); + foreach ($matches as $item) { + $str = $item->textContent; + if (!empty($str)) { + $str = substr($str,0, 1); + $o->textContent = $str; + $found->attach($o); + } + } + $this->matches = $found; + break; + case 'before': + case 'after': + // There is nothing in a DOM to return for the before and after + // selectors. + case 'selection': + // With no user agent, we don't have a concept of user selection. + throw new NotImplementedException("The $name pseudo-element is not implemented."); + break; + } + $this->findAnyElement = FALSE; + } + public function directDescendant() { + $this->findAnyElement = FALSE; + + $kids = new \SplObjectStorage(); + foreach ($this->matches as $item) { + $kidsNL = $item->childNodes; + foreach ($kidsNL as $kidNode) { + if ($kidNode->nodeType == XML_ELEMENT_NODE) { + $kids->attach($kidNode); + } + } + } + $this->matches = $kids; + } + /** + * For an element to be adjacent to another, it must be THE NEXT NODE + * in the node list. So if an element is surrounded by pcdata, there are + * no adjacent nodes. E.g. in FOO, the a and b elements are not + * adjacent. + * + * In a strict DOM parser, line breaks and empty spaces are nodes. That means + * nodes like this will not be adjacent: . The space between + * them makes them non-adjacent. If this is not the desired behavior, pass + * in the appropriate flags to your parser. Example: + * + * $doc = new DomDocument(); + * $doc->loadXML(' ', LIBXML_NOBLANKS); + * + */ + public function adjacent() { + $this->findAnyElement = FALSE; + // List of nodes that are immediately adjacent to the current one. + //$found = array(); + $found = new \SplObjectStorage(); + foreach ($this->matches as $item) { + while (isset($item->nextSibling)) { + if (isset($item->nextSibling) && $item->nextSibling->nodeType === XML_ELEMENT_NODE) { + $found->attach($item->nextSibling); + break; + } + $item = $item->nextSibling; + } + } + $this->matches = $found; + } + + public function anotherSelector() { + $this->findAnyElement = FALSE; + // Copy old matches into buffer. + if ($this->matches->count() > 0) { + //$this->alreadyMatched = array_merge($this->alreadyMatched, $this->matches); + foreach ($this->matches as $item) $this->alreadyMatched->attach($item); + } + + // Start over at the top of the tree. + $this->findAnyElement = TRUE; // Reset depth flag. + $this->matches = new \SplObjectStorage(); + $this->matches->attach($this->dom); + } + + /** + * Get all nodes that are siblings to currently selected nodes. + * + * If two passed in items are siblings of each other, neither will + * be included in the list of siblings. Their status as being candidates + * excludes them from being considered siblings. + */ + public function sibling() { + $this->findAnyElement = FALSE; + // Get the nodes at the same level. + + if ($this->matches->count() > 0) { + $sibs = new \SplObjectStorage(); + foreach ($this->matches as $item) { + /*$candidates = $item->parentNode->childNodes; + foreach ($candidates as $candidate) { + if ($candidate->nodeType === XML_ELEMENT_NODE && $candidate !== $item) { + $sibs->attach($candidate); + } + } + */ + while ($item->nextSibling != NULL) { + $item = $item->nextSibling; + if ($item->nodeType === XML_ELEMENT_NODE) $sibs->attach($item); + } + } + $this->matches = $sibs; + } + } + + /** + * Get any descendant. + */ + public function anyDescendant() { + // Get children: + $found = new \SplObjectStorage(); + foreach ($this->matches as $item) { + $kids = $item->getElementsByTagName('*'); + //$found = array_merge($found, $this->nodeListToArray($kids)); + $this->attachNodeList($kids, $found); + } + $this->matches = $found; + + // Set depth flag: + $this->findAnyElement = TRUE; + } + + /** + * Determine what candidates are in the current scope. + * + * This is a utility method that gets the list of elements + * that should be evaluated in the context. If $this->findAnyElement + * is TRUE, this will return a list of every element that appears in + * the subtree of $this->matches. Otherwise, it will just return + * $this->matches. + */ + private function candidateList() { + if ($this->findAnyElement) { + return $this->getAllCandidates($this->matches); + } + return $this->matches; + } + + /** + * Get a list of all of the candidate elements. + * + * This is used when $this->findAnyElement is TRUE. + * @param $elements + * A list of current elements (usually $this->matches). + * + * @return + * A list of all candidate elements. + */ + private function getAllCandidates($elements) { + $found = new \SplObjectStorage(); + foreach ($elements as $item) { + $found->attach($item); // put self in + $nl = $item->getElementsByTagName('*'); + //foreach ($nl as $node) $found[] = $node; + $this->attachNodeList($nl, $found); + } + return $found; + } + /* + public function nodeListToArray($nodeList) { + $array = array(); + foreach ($nodeList as $node) { + if ($node->nodeType == XML_ELEMENT_NODE) { + $array[] = $node; + } + } + return $array; + } + */ + + /** + * Attach all nodes in a node list to the given \SplObjectStorage. + */ + public function attachNodeList(\DOMNodeList $nodeList, \SplObjectStorage $splos) { + foreach ($nodeList as $item) $splos->attach($item); + } + +} diff --git a/lib/querypath/src/QueryPath/CSS/Scanner.php b/lib/querypath/src/QueryPath/CSS/Scanner.php new file mode 100644 index 0000000..3513a0b --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/Scanner.php @@ -0,0 +1,306 @@ +is = $in; + } + + /** + * Return the position of the reader in the string. + */ + public function position() { + return $this->is->position; + } + + /** + * See the next char without removing it from the stack. + * + * @return char + * Returns the next character on the stack. + */ + public function peek() { + return $this->is->peek(); + } + + /** + * Get the next token in the input stream. + * + * This sets the current token to the value of the next token in + * the stream. + * + * @return int + * Returns an int value corresponding to one of the Token constants, + * or FALSE if the end of the string is reached. (Remember to use + * strong equality checking on FALSE, since 0 is a valid token id.) + */ + public function nextToken() { + $tok = -1; + ++$this->it; + if ($this->is->isEmpty()) { + if ($this->recurse) { + throw new \QueryPath\Exception("Recursion error detected at iteration " . $this->it . '.'); + exit(); + } + //print "{$this->it}: All done\n"; + $this->recurse = TRUE; + $this->token = FALSE; + return FALSE; + } + $ch = $this->is->consume(); + //print __FUNCTION__ . " Testing $ch.\n"; + if (ctype_space($ch)) { + $this->value = ' '; // Collapse all WS to a space. + $this->token = $tok = Token::white; + //$ch = $this->is->consume(); + return $tok; + } + + if (ctype_alnum($ch) || $ch == '-' || $ch == '_') { + // It's a character + $this->value = $ch; //strtolower($ch); + $this->token = $tok = Token::char; + return $tok; + } + + $this->value = $ch; + + switch($ch) { + case '*': + $tok = Token::star; + break; + case chr(ord('>')): + $tok = Token::rangle; + break; + case '.': + $tok = Token::dot; + break; + case '#': + $tok = Token::octo; + break; + case '[': + $tok = Token::lsquare; + break; + case ']': + $tok = Token::rsquare; + break; + case ':': + $tok = Token::colon; + break; + case '(': + $tok = Token::lparen; + break; + case ')': + $tok = Token::rparen; + break; + case '+': + $tok = Token::plus; + break; + case '~': + $tok = Token::tilde; + break; + case '=': + $tok = Token::eq; + break; + case '|': + $tok = Token::pipe; + break; + case ',': + $tok = Token::comma; + break; + case chr(34): + $tok = Token::quote; + break; + case "'": + $tok = Token::squote; + break; + case '\\': + $tok = Token::bslash; + break; + case '^': + $tok = Token::carat; + break; + case '$': + $tok = Token::dollar; + break; + case '@': + $tok = Token::at; + break; + } + + + // Catch all characters that are legal within strings. + if ($tok == -1) { + // TODO: This should be UTF-8 compatible, but PHP doesn't + // have a native UTF-8 string. Should we use external + // mbstring library? + + $ord = ord($ch); + // Characters in this pool are legal for use inside of + // certain strings. Extended ASCII is used here, though I + // Don't know if these are really legal. + if (($ord >= 32 && $ord <= 126) || ($ord >= 128 && $ord <= 255)) { + $tok = Token::stringLegal; + } + else { + throw new ParseException('Illegal character found in stream: ' . $ord); + } + } + + $this->token = $tok; + return $tok; + } + + /** + * Get a name string from the input stream. + * A name string must be composed of + * only characters defined in Token:char: -_a-zA-Z0-9 + */ + public function getNameString() { + $buf = ''; + while ($this->token === Token::char) { + $buf .= $this->value; + $this->nextToken(); + //print '_'; + } + return $buf; + } + + /** + * This gets a string with any legal 'string' characters. + * See CSS Selectors specification, section 11, for the + * definition of string. + * + * This will check for string1, string2, and the case where a + * string is unquoted (Oddly absent from the "official" grammar, + * though such strings are present as examples in the spec.) + * + * Note: + * Though the grammar supplied by CSS 3 Selectors section 11 does not + * address the contents of a pseudo-class value, the spec itself indicates + * that a pseudo-class value is a "value between parenthesis" [6.6]. The + * examples given use URLs among other things, making them closer to the + * definition of 'string' than to 'name'. So we handle them here as strings. + */ + public function getQuotedString() { + if ($this->token == Token::quote || $this->token == Token::squote || $this->token == Token::lparen) { + $end = ($this->token == Token::lparen) ? Token::rparen : $this->token; + $buf = ''; + $escape = FALSE; + + $this->nextToken(); // Skip the opening quote/paren + + // The second conjunct is probably not necessary. + while ($this->token !== FALSE && $this->token > -1) { + //print "Char: $this->value \n"; + if ($this->token == Token::bslash && !$escape) { + // XXX: The backslash (\) is removed here. + // Turn on escaping. + //$buf .= $this->value; + $escape = TRUE; + } + elseif ($escape) { + // Turn off escaping + $buf .= $this->value; + $escape = FALSE; + } + elseif ($this->token === $end) { + // At end of string; skip token and break. + $this->nextToken(); + break; + } + else { + // Append char. + $buf .= $this->value; + } + $this->nextToken(); + } + return $buf; + } + } + + // Get the contents inside of a pseudoClass(). + public function getPseudoClassString() { + if ($this->token == Token::quote || $this->token == Token::squote || $this->token == Token::lparen) { + $end = ($this->token == Token::lparen) ? Token::rparen : $this->token; + $buf = ''; + $escape = FALSE; + + $this->nextToken(); // Skip the opening quote/paren + + // The second conjunct is probably not necessary. + while ($this->token !== FALSE && $this->token > -1) { + //print "Char: $this->value \n"; + if ($this->token == Token::bslash && !$escape) { + // XXX: The backslash (\) is removed here. + // Turn on escaping. + //$buf .= $this->value; + $escape = TRUE; + } + elseif ($escape) { + // Turn off escaping + $buf .= $this->value; + $escape = FALSE; + } + // Allow nested pseudoclasses. + elseif ($this->token == Token::lparen) { + $buf .= "("; + $buf .= $this->getPseudoClassString(); + $buf .= ")"; + } + elseif ($this->token === $end) { + // At end of string; skip token and break. + $this->nextToken(); + break; + } + else { + // Append char. + $buf .= $this->value; + } + $this->nextToken(); + } + return $buf; + } + } + + /** + * Get a string from the input stream. + * This is a convenience function for getting a string of + * characters that are either alphanumber or whitespace. See + * the Token::white and Token::char definitions. + * + * @deprecated This is not used anywhere in QueryPath. + *//* + public function getStringPlusWhitespace() { + $buf = ''; + if($this->token === FALSE) {return '';} + while ($this->token === Token::char || $this->token == Token::white) { + $buf .= $this->value; + $this->nextToken(); + } + return $buf; + }*/ + +} diff --git a/lib/querypath/src/QueryPath/CSS/Selector.php b/lib/querypath/src/QueryPath/CSS/Selector.php new file mode 100644 index 0000000..4b538bd --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/Selector.php @@ -0,0 +1,144 @@ +b>c', the iterator will produce: + * - c + * - b + * - a + * It is assumed, therefore, that any suitable querying engine will + * traverse from the bottom (`c`) back up. + * + * @b Usage + * + * This class is an event handler. It can be plugged into an Parser and + * receive the events the Parser generates. + * + * This class is also an iterator. Once the parser has completed, the + * captured selectors can be iterated over. + * + * @code + * parse(); + * + * foreach ($selectorList as $simpleSelector) { + * // Do something with the SimpleSelector. + * print_r($simpleSelector); + * } + * ?> + * @endode + * + * + * @since QueryPath 3.0.0 + */ +class Selector implements EventHandler, \IteratorAggregate, \Countable { + protected $selectors = array(); + protected $currSelector; + protected $selectorGroups = array(); + protected $groupIndex = 0; + + public function __construct() { + $this->currSelector = new SimpleSelector(); + + $this->selectors[$this->groupIndex][] = $this->currSelector; + } + + public function getIterator() { + return new \ArrayIterator($this->selectors); + } + + /** + * Get the array of SimpleSelector objects. + * + * Normally, one iterates over a Selector. However, if it is + * necessary to get the selector array and manipulate it, this + * method can be used. + */ + public function toArray() { + return $this->selectors; + } + + public function count() { + return count($this->selectors); + } + + public function elementID($id) { + $this->currSelector->id = $id; + } + public function element($name) { + $this->currSelector->element = $name; + } + public function elementNS($name, $namespace = NULL) { + $this->currSelector->ns = $namespace; + $this->currSelector->element = $name; + } + public function anyElement() { + $this->currSelector->element = '*'; + } + public function anyElementInNS($ns) { + $this->currSelector->ns = $ns; + $this->currSelector->element = '*'; + } + public function elementClass($name) { + $this->currSelector->classes[] = $name; + } + public function attribute($name, $value = NULL, $operation = EventHandler::isExactly) { + $this->currSelector->attributes[] = array( + 'name' => $name, + 'value' => $value, + 'op' => $operation, + ); + } + public function attributeNS($name, $ns, $value = NULL, $operation = EventHandler::isExactly) { + $this->currSelector->attributes[] = array( + 'name' => $name, + 'value' => $value, + 'op' => $operation, + 'ns' => $ns, + ); + } + public function pseudoClass($name, $value = NULL) { + $this->currSelector->pseudoClasses[] = array('name' => $name, 'value' => $value); + } + public function pseudoElement($name) { + $this->currSelector->pseudoElements[] = $name; + } + public function combinator($combinatorName) { + $this->currSelector->combinator = $combinatorName; + $this->currSelector = new SimpleSelector(); + array_unshift($this->selectors[$this->groupIndex], $this->currSelector); + //$this->selectors[]= $this->currSelector; + } + public function directDescendant() { + $this->combinator(SimpleSelector::directDescendant); + } + public function adjacent() { + $this->combinator(SimpleSelector::adjacent); + } + public function anotherSelector() { + $this->groupIndex++; + $this->currSelector = new SimpleSelector(); + $this->selectors[$this->groupIndex] = array($this->currSelector); + } + public function sibling() { + $this->combinator(SimpleSelector::sibling); + } + public function anyDescendant() { + $this->combinator(SimpleSelector::anyDescendant); + } +} diff --git a/lib/querypath/src/QueryPath/CSS/SimpleSelector.php b/lib/querypath/src/QueryPath/CSS/SimpleSelector.php new file mode 100644 index 0000000..3fcc796 --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/SimpleSelector.php @@ -0,0 +1,138 @@ +'; + case self::sibling: + return '~'; + case self::anotherSelector: + return ', '; + case self::anyDescendant: + return ' '; + } + } + + public function __construct() { + } + + public function notEmpty() { + return !empty($element) + && !empty($id) + && !empty($classes) + && !empty($combinator) + && !empty($attributes) + && !empty($pseudoClasses) + && !empty($pseudoElements) + ; + } + + public function __tostring() { + $buffer = array(); + try { + + if (!empty($this->ns)) { + $buffer[] = $this->ns; $buffer[] = '|'; + } + if (!empty($this->element)) $buffer[] = $this->element; + if (!empty($this->id)) $buffer[] = '#' . $this->id; + if (!empty($this->attributes)) { + foreach ($this->attributes as $attr) { + $buffer[] = '['; + if(!empty($attr['ns'])) $buffer[] = $attr['ns'] . '|'; + $buffer[] = $attr['name']; + if (!empty($attr['value'])) { + $buffer[] = self::attributeOperator($attr['op']); + $buffer[] = $attr['value']; + } + $buffer[] = ']'; + } + } + if (!empty($this->pseudoClasses)) { + foreach ($this->pseudoClasses as $ps) { + $buffer[] = ':' . $ps['name']; + if (isset($ps['value'])) { + $buffer[] = '(' . $ps['value'] . ')'; + } + } + } + foreach ($this->pseudoElements as $pe) { + $buffer[] = '::' . $pe; + } + + if (!empty($this->combinator)) { + $buffer[] = self::combinatorOperator($this->combinator); + } + + } + catch (\Exception $e) { + return $e->getMessage(); + } + + return implode('', $buffer); + } + +} diff --git a/lib/querypath/src/QueryPath/CSS/Token.php b/lib/querypath/src/QueryPath/CSS/Token.php new file mode 100644 index 0000000..3c31ef4 --- /dev/null +++ b/lib/querypath/src/QueryPath/CSS/Token.php @@ -0,0 +1,60 @@ + NULL, + 'omit_xml_declaration' => FALSE, + 'replace_entities' => FALSE, + 'exception_level' => 771, // E_ERROR | E_USER_ERROR | E_USER_WARNING | E_WARNING + 'ignore_parser_warnings' => FALSE, + 'escape_xhtml_js_css_sections' => self::JS_CSS_ESCAPE_CDATA_CCOMMENT, + ); + /** + * The array of matches. + */ + protected $matches = array(); + /** + * The last array of matches. + */ + protected $last = array(); // Last set of matches. + private $ext = array(); // Extensions array. + + /** + * The number of current matches. + * + * @see count() + */ + public $length = 0; + + /** + * Constructor. + * + * Typically, a new DOMQuery is created by QueryPath::with(), QueryPath::withHTML(), + * qp(), or htmlqp(). + * + * @param mixed $document + * A document-like object. + * @param string $string + * A CSS 3 Selector + * @param array $options + * An associative array of options. + * @see qp() + */ + public function __construct($document = NULL, $string = NULL, $options = array()) { + $string = trim($string); + $this->options = $options + Options::get() + $this->options; + + $parser_flags = isset($options['parser_flags']) ? $options['parser_flags'] : self::DEFAULT_PARSER_FLAGS; + if (!empty($this->options['ignore_parser_warnings'])) { + // Don't convert parser warnings into exceptions. + $this->errTypes = 257; //E_ERROR | E_USER_ERROR; + } + elseif (isset($this->options['exception_level'])) { + // Set the error level at which exceptions will be thrown. By default, + // QueryPath will throw exceptions for + // E_ERROR | E_USER_ERROR | E_WARNING | E_USER_WARNING. + $this->errTypes = $this->options['exception_level']; + } + + // Empty: Just create an empty QP. + if (empty($document)) { + $this->document = isset($this->options['encoding']) ? new \DOMDocument('1.0', $this->options['encoding']) : new \DOMDocument(); + $this->setMatches(new \SplObjectStorage()); + } + // Figure out if document is DOM, HTML/XML, or a filename + elseif (is_object($document)) { + + // This is the most frequent object type. + if ($document instanceof \SplObjectStorage) { + $this->matches = $document; + if ($document->count() != 0) { + $first = $this->getFirstMatch(); + if (!empty($first->ownerDocument)) { + $this->document = $first->ownerDocument; + } + } + } + elseif ($document instanceof DOMQuery) { + //$this->matches = $document->get(NULL, TRUE); + $this->setMatches($document->get(NULL, TRUE)); + if ($this->matches->count() > 0) + $this->document = $this->getFirstMatch()->ownerDocument; + } + elseif ($document instanceof \DOMDocument) { + $this->document = $document; + //$this->matches = $this->matches($document->documentElement); + $this->setMatches($document->documentElement); + } + elseif ($document instanceof \DOMNode) { + $this->document = $document->ownerDocument; + //$this->matches = array($document); + $this->setMatches($document); + } + elseif ($document instanceof \Masterminds\HTML5) { + $this->document = $document; + $this->setMatches($document->documentElement); + } + elseif ($document instanceof \SimpleXMLElement) { + $import = dom_import_simplexml($document); + $this->document = $import->ownerDocument; + //$this->matches = array($import); + $this->setMatches($import); + } + else { + throw new \QueryPath\Exception('Unsupported class type: ' . get_class($document)); + } + } + elseif (is_array($document)) { + //trigger_error('Detected deprecated array support', E_USER_NOTICE); + if (!empty($document) && $document[0] instanceof \DOMNode) { + $found = new \SplObjectStorage(); + foreach ($document as $item) $found->attach($item); + //$this->matches = $found; + $this->setMatches($found); + $this->document = $this->getFirstMatch()->ownerDocument; + } + } + elseif ($this->isXMLish($document)) { + // $document is a string with XML + $this->document = $this->parseXMLString($document); + $this->setMatches($this->document->documentElement); + } + else { + + // $document is a filename + $context = empty($options['context']) ? NULL : $options['context']; + $this->document = $this->parseXMLFile($document, $parser_flags, $context); + $this->setMatches($this->document->documentElement); + } + + // Globally set the output option. + if (isset($this->options['format_output']) && $this->options['format_output'] == FALSE) { + $this->document->formatOutput = FALSE; + } + else { + $this->document->formatOutput = TRUE; + } + + // Do a find if the second param was set. + if (isset($string) && strlen($string) > 0) { + // We don't issue a find because that creates a new DOMQuery. + //$this->find($string); + + $query = new \QueryPath\CSS\DOMTraverser($this->matches); + $query->find($string); + $this->setMatches($query->matches()); + } + } + + + /** + * Get the effective options for the current DOMQuery object. + * + * This returns an associative array of all of the options as set + * for the current DOMQuery object. This includes default options, + * options directly passed in via {@link qp()} or the constructor, + * an options set in the QueryPath::Options object. + * + * The order of merging options is this: + * - Options passed in using qp() are highest priority, and will + * override other options. + * - Options set with QueryPath::Options will override default options, + * but can be overridden by options passed into qp(). + * - Default options will be used when no overrides are present. + * + * This function will return the options currently used, with the above option + * overriding having been calculated already. + * + * @return array + * An associative array of options, calculated from defaults and overridden + * options. + * @see qp() + * @see QueryPath::Options::set() + * @see QueryPath::Options::merge() + * @since 2.0 + */ + public function getOptions() { + return $this->options; + } + + /** + * Select the root element of the document. + * + * This sets the current match to the document's root element. For + * practical purposes, this is the same as: + * @code + * qp($someDoc)->find(':root'); + * @endcode + * However, since it doesn't invoke a parser, it has less overhead. It also + * works in cases where the QueryPath has been reduced to zero elements (a + * case that is not handled by find(':root') because there is no element + * whose root can be found). + * + * @param string $selector + * A selector. If this is supplied, QueryPath will navigate to the + * document root and then run the query. (Added in QueryPath 2.0 Beta 2) + * @return \QueryPath\DOMQuery + * The DOMQuery object, wrapping the root element (document element) + * for the current document. + */ + public function top($selector = NULL) { + //$this->setMatches($this->document->documentElement); + //return !empty($selector) ? $this->find($selector) : $this; + return $this->inst($this->document->documentElement, $selector, $this->options); + } + + /** + * Given a CSS Selector, find matching items. + * + * @param string $selector + * CSS 3 Selector + * @return \QueryPath\DOMQuery + * @see filter() + * @see is() + * @todo If a find() returns zero matches, then a subsequent find() will + * also return zero matches, even if that find has a selector like :root. + * The reason for this is that the {@link QueryPathEventHandler} does + * not set the root of the document tree if it cannot find any elements + * from which to determine what the root is. The workaround is to use + * {@link top()} to select the root element again. + */ + public function find($selector) { + + //$query = new QueryPathEventHandler($this->matches); + $query = new \QueryPath\CSS\DOMTraverser($this->matches); + $query->find($selector); + //$this->setMatches($query->matches()); + //return $this; + return $this->inst($query->matches(), NULL , $this->options); + } + public function findInPlace($selector) { + $query = new \QueryPath\CSS\DOMTraverser($this->matches); + $query->find($selector); + $this->setMatches($query->matches()); + return $this; + } + + /** + * Execute an XPath query and store the results in the QueryPath. + * + * Most methods in this class support CSS 3 Selectors. Sometimes, though, + * XPath provides a finer-grained query language. Use this to execute + * XPath queries. + * + * Beware, though. DOMQuery works best on DOM Elements, but an XPath + * query can return other nodes, strings, and values. These may not work with + * other QueryPath functions (though you will be able to access the + * values with {@link get()}). + * + * @param string $query + * An XPath query. + * @param array $options + * Currently supported options are: + * - 'namespace_prefix': And XML namespace prefix to be used as the default. Used + * in conjunction with 'namespace_uri' + * - 'namespace_uri': The URI to be used as the default namespace URI. Used + * with 'namespace_prefix' + * @return \QueryPath\DOMQuery + * A DOMQuery object wrapping the results of the query. + * @see find() + * @author M Butcher + * @author Xavier Prud'homme + */ + public function xpath($query, $options = array()) { + $xpath = new \DOMXPath($this->document); + + // Register a default namespace. + if (!empty($options['namespace_prefix']) && !empty($options['namespace_uri'])) { + $xpath->registerNamespace($options['namespace_prefix'], $options['namespace_uri']); + } + + $found = new \SplObjectStorage(); + foreach ($this->matches as $item) { + $nl = $xpath->query($query, $item); + if ($nl->length > 0) { + for ($i = 0; $i < $nl->length; ++$i) $found->attach($nl->item($i)); + } + } + return $this->inst($found, NULL, $this->options); + //$this->setMatches($found); + //return $this; + } + + /** + * Get the number of elements currently wrapped by this object. + * + * Note that there is no length property on this object. + * + * @return int + * Number of items in the object. + * @deprecated QueryPath now implements Countable, so use count(). + */ + public function size() { + return $this->matches->count(); + } + + /** + * Get the number of elements currently wrapped by this object. + * + * Since DOMQuery is Countable, the PHP count() function can also + * be used on a DOMQuery. + * + * @code + * + * @endcode + * + * @return int + * The number of matches in the DOMQuery. + */ + public function count() { + return $this->matches->count(); + } + + /** + * Get one or all elements from this object. + * + * When called with no paramaters, this returns all objects wrapped by + * the DOMQuery. Typically, these are DOMElement objects (unless you have + * used map(), xpath(), or other methods that can select + * non-elements). + * + * When called with an index, it will return the item in the DOMQuery with + * that index number. + * + * Calling this method does not change the DOMQuery (e.g. it is + * non-destructive). + * + * You can use qp()->get() to iterate over all elements matched. You can + * also iterate over qp() itself (DOMQuery implementations must be Traversable). + * In the later case, though, each item + * will be wrapped in a DOMQuery object. To learn more about iterating + * in QueryPath, see {@link examples/techniques.php}. + * + * @param int $index + * If specified, then only this index value will be returned. If this + * index is out of bounds, a NULL will be returned. + * @param boolean $asObject + * If this is TRUE, an SplObjectStorage object will be returned + * instead of an array. This is the preferred method for extensions to use. + * @return mixed + * If an index is passed, one element will be returned. If no index is + * present, an array of all matches will be returned. + * @see eq() + * @see SplObjectStorage + */ + public function get($index = NULL, $asObject = FALSE) { + if (isset($index)) { + return ($this->size() > $index) ? $this->getNthMatch($index) : NULL; + } + // Retain support for legacy. + if (!$asObject) { + $matches = array(); + foreach ($this->matches as $m) $matches[] = $m; + return $matches; + } + return $this->matches; + } + + /** + * Get the namespace of the current element. + * + * If QP is currently pointed to a list of elements, this will get the + * namespace of the first element. + */ + public function ns() { + return $this->get(0)->namespaceURI; + } + + /** + * Get the DOMDocument that we currently work with. + * + * This returns the current DOMDocument. Any changes made to this document will be + * accessible to DOMQuery, as both will share access to the same object. + * + * @return DOMDocument + */ + public function document() { + return $this->document; + } + + /** + * On an XML document, load all XIncludes. + * + * @return \QueryPath\DOMQuery + */ + public function xinclude() { + $this->document->xinclude(); + return $this; + } + + /** + * Get all current elements wrapped in an array. + * Compatibility function for jQuery 1.4, but identical to calling {@link get()} + * with no parameters. + * + * @return array + * An array of DOMNodes (typically DOMElements). + */ + public function toArray() { + return $this->get(); + } + /** + * Get/set an attribute. + * - If no parameters are specified, this returns an associative array of all + * name/value pairs. + * - If both $name and $value are set, then this will set the attribute name/value + * pair for all items in this object. + * - If $name is set, and is an array, then + * all attributes in the array will be set for all items in this object. + * - If $name is a string and is set, then the attribute value will be returned. + * + * When an attribute value is retrieved, only the attribute value of the FIRST + * match is returned. + * + * @param mixed $name + * The name of the attribute or an associative array of name/value pairs. + * @param string $value + * A value (used only when setting an individual property). + * @return mixed + * If this was a setter request, return the DOMQuery object. If this was + * an access request (getter), return the string value. + * @see removeAttr() + * @see tag() + * @see hasAttr() + * @see hasClass() + */ + public function attr($name = NULL, $value = NULL) { + + // Default case: Return all attributes as an assoc array. + if (is_null($name)) { + if ($this->matches->count() == 0) return NULL; + $ele = $this->getFirstMatch(); + $buffer = array(); + + // This does not appear to be part of the DOM + // spec. Nor is it documented. But it works. + foreach ($ele->attributes as $name => $attrNode) { + $buffer[$name] = $attrNode->value; + } + return $buffer; + } + + // multi-setter + if (is_array($name)) { + foreach ($name as $k => $v) { + foreach ($this->matches as $m) $m->setAttribute($k, $v); + } + return $this; + } + // setter + if (isset($value)) { + foreach ($this->matches as $m) $m->setAttribute($name, $value); + return $this; + } + + //getter + if ($this->matches->count() == 0) return NULL; + + // Special node type handler: + if ($name == 'nodeType') { + return $this->getFirstMatch()->nodeType; + } + + // Always return first match's attr. + return $this->getFirstMatch()->getAttribute($name); + } + /** + * Check to see if the given attribute is present. + * + * This returns TRUE if all selected items have the attribute, or + * FALSE if at least one item does not have the attribute. + * + * @param string $attrName + * The attribute name. + * @return boolean + * TRUE if all matches have the attribute, FALSE otherwise. + * @since 2.0 + * @see attr() + * @see hasClass() + */ + public function hasAttr($attrName) { + foreach ($this->matches as $match) { + if (!$match->hasAttribute($attrName)) return FALSE; + } + return TRUE; + } + + /** + * Set/get a CSS value for the current element(s). + * This sets the CSS value for each element in the DOMQuery object. + * It does this by setting (or getting) the style attribute (without a namespace). + * + * For example, consider this code: + * @code + * css('background-color','red')->html(); + * ?> + * @endcode + * This will return the following HTML: + * @code + * + * @endcode + * + * If no parameters are passed into this function, then the current style + * element will be returned unparsed. Example: + * @code + * css('background-color','red')->css(); + * ?> + * @endcode + * This will return the following: + * @code + * background-color: red + * @endcode + * + * As of QueryPath 2.1, existing style attributes will be merged with new attributes. + * (In previous versions of QueryPath, a call to css() overwrite the existing style + * values). + * + * @param mixed $name + * If this is a string, it will be used as a CSS name. If it is an array, + * this will assume it is an array of name/value pairs of CSS rules. It will + * apply all rules to all elements in the set. + * @param string $value + * The value to set. This is only set if $name is a string. + * @return \QueryPath\DOMQuery + */ + public function css($name = NULL, $value = '') { + if (empty($name)) { + return $this->attr('style'); + } + + // Get any existing CSS. + $css = array(); + foreach ($this->matches as $match) { + $style = $match->getAttribute('style'); + if (!empty($style)) { + // XXX: Is this sufficient? + $style_array = explode(';', $style); + foreach ($style_array as $item) { + $item = trim($item); + + // Skip empty attributes. + if (strlen($item) == 0) continue; + + list($css_att, $css_val) = explode(':',$item, 2); + $css[$css_att] = trim($css_val); + } + } + } + + if (is_array($name)) { + // Use array_merge instead of + to preserve order. + $css = array_merge($css, $name); + } + else { + $css[$name] = $value; + } + + // Collapse CSS into a string. + $format = '%s: %s;'; + $css_string = ''; + foreach ($css as $n => $v) { + $css_string .= sprintf($format, $n, trim($v)); + } + + $this->attr('style', $css_string); + return $this; + } + + /** + * Insert or retrieve a Data URL. + * + * When called with just $attr, it will fetch the result, attempt to decode it, and + * return an array with the MIME type and the application data. + * + * When called with both $attr and $data, it will inject the data into all selected elements + * So @code$qp->dataURL('src', file_get_contents('my.png'), 'image/png')@endcode will inject + * the given PNG image into the selected elements. + * + * The current implementation only knows how to encode and decode Base 64 data. + * + * Note that this is known *not* to work on IE 6, but should render fine in other browsers. + * + * @param string $attr + * The name of the attribute. + * @param mixed $data + * The contents to inject as the data. The value can be any one of the following: + * - A URL: If this is given, then the subsystem will read the content from that URL. THIS + * MUST BE A FULL URL, not a relative path. + * - A string of data: If this is given, then the subsystem will encode the string. + * - A stream or file handle: If this is given, the stream's contents will be encoded + * and inserted as data. + * (Note that we make the assumption here that you would never want to set data to be + * a URL. If this is an incorrect assumption, file a bug.) + * @param string $mime + * The MIME type of the document. + * @param resource $context + * A valid context. Use this only if you need to pass a stream context. This is only necessary + * if $data is a URL. (See {@link stream_context_create()}). + * @return \QueryPath\DOMQuery|string + * If this is called as a setter, this will return a DOMQuery object. Otherwise, it + * will attempt to fetch data out of the attribute and return that. + * @see http://en.wikipedia.org/wiki/Data:_URL + * @see attr() + * @since 2.1 + */ + public function dataURL($attr, $data = NULL, $mime = 'application/octet-stream', $context = NULL) { + if (is_null($data)) { + // Attempt to fetch the data + $data = $this->attr($attr); + if (empty($data) || is_array($data) || strpos($data, 'data:') !== 0) { + return; + } + + // So 1 and 2 should be MIME types, and 3 should be the base64-encoded data. + $regex = '/^data:([a-zA-Z0-9]+)\/([a-zA-Z0-9]+);base64,(.*)$/'; + $matches = array(); + preg_match($regex, $data, $matches); + + if (!empty($matches)) { + $result = array( + 'mime' => $matches[1] . '/' . $matches[2], + 'data' => base64_decode($matches[3]), + ); + return $result; + } + } + else { + $attVal = \QueryPath::encodeDataURL($data, $mime, $context); + return $this->attr($attr, $attVal); + } + } + + /** + * Remove the named attribute from all elements in the current DOMQuery. + * + * This will remove any attribute with the given name. It will do this on each + * item currently wrapped by DOMQuery. + * + * As is the case in jQuery, this operation is not considered destructive. + * + * @param string $name + * Name of the parameter to remove. + * @return \QueryPath\DOMQuery + * The DOMQuery object with the same elements. + * @see attr() + */ + public function removeAttr($name) { + foreach ($this->matches as $m) { + //if ($m->hasAttribute($name)) + $m->removeAttribute($name); + } + return $this; + } + /** + * Reduce the matched set to just one. + * + * This will take a matched set and reduce it to just one item -- the item + * at the index specified. This is a destructive operation, and can be undone + * with {@link end()}. + * + * @param $index + * The index of the element to keep. The rest will be + * discarded. + * @return \QueryPath\DOMQuery + * @see get() + * @see is() + * @see end() + */ + public function eq($index) { + return $this->inst($this->getNthMatch($index), NULL, $this->options); + // XXX: Might there be a more efficient way of doing this? + //$this->setMatches($this->getNthMatch($index)); + //return $this; + } + /** + * Given a selector, this checks to see if the current set has one or more matches. + * + * Unlike jQuery's version, this supports full selectors (not just simple ones). + * + * @param string $selector + * The selector to search for. As of QueryPath 2.1.1, this also supports passing a + * DOMNode object. + * @return boolean + * TRUE if one or more elements match. FALSE if no match is found. + * @see get() + * @see eq() + */ + public function is($selector) { + + if (is_object($selector)) { + if ($selector instanceof \DOMNode) { + return count($this->matches) == 1 && $selector->isSameNode($this->get(0)); + } + elseif ($selector instanceof \Traversable) { + if (count($selector) != count($this->matches)) { + return FALSE; + } + // Without $seen, there is an edge case here if $selector contains the same object + // more than once, but the counts are equal. For example, [a, a, a, a] will + // pass an is() on [a, b, c, d]. We use the $seen SPLOS to prevent this. + $seen = new \SplObjectStorage(); + foreach ($selector as $item) { + if (!$this->matches->contains($item) || $seen->contains($item)) { + return FALSE; + } + $seen->attach($item); + } + return TRUE; + } + throw new \QueryPath\Exception('Cannot compare an object to a DOMQuery.'); + return FALSE; + } + + // Testing based on Issue #70. + //fprintf(STDOUT, __FUNCTION__ .' found %d', $this->find($selector)->count()); + return $this->branch($selector)->count() > 0; + + // Old version: + //foreach ($this->matches as $m) { + //$q = new \QueryPath\CSS\QueryPathEventHandler($m); + //if ($q->find($selector)->getMatches()->count()) { + //return TRUE; + //} + //} + //return FALSE; + } + /** + * Filter a list down to only elements that match the selector. + * Use this, for example, to find all elements with a class, or with + * certain children. + * + * @param string $selector + * The selector to use as a filter. + * @return \QueryPath\DOMQuery + * The DOMQuery with non-matching items filtered out. + * @see filterLambda() + * @see filterCallback() + * @see map() + * @see find() + * @see is() + */ + public function filter($selector) { + + $found = new \SplObjectStorage(); + $tmp = new \SplObjectStorage(); + foreach ($this->matches as $m) { + $tmp->attach($m); + // Seems like this should be right... but it fails unit + // tests. Need to compare to jQuery. + // $query = new \QueryPath\CSS\DOMTraverser($tmp, TRUE, $m); + $query = new \QueryPath\CSS\DOMTraverser($tmp); + $query->find($selector); + if (count($query->matches())) { + $found->attach($m); + } + $tmp->detach($m); + } + return $this->inst($found, NULL, $this->options); + } + + /** + * Sort the contents of the QueryPath object. + * + * By default, this does not change the order of the elements in the + * DOM. Instead, it just sorts the internal list. However, if TRUE + * is passed in as the second parameter then QueryPath will re-order + * the DOM, too. + * + * @attention + * DOM re-ordering is done by finding the location of the original first + * item in the list, and then placing the sorted list at that location. + * + * The argument $compartor is a callback, such as a function name or a + * closure. The callback receives two DOMNode objects, which you can use + * as DOMNodes, or wrap in QueryPath objects. + * + * A simple callback: + * @code + * textContent == $b->textContent) { + * return 0; + * } + * return $a->textContent > $b->textContent ? 1 : -1; + * }; + * $qp = QueryPath::with($xml, $selector)->sort($comp); + * ?> + * @endcode + * + * The above sorts the matches into lexical order using the text of each node. + * If you would prefer to work with QueryPath objects instead of DOMNode + * objects, you may prefer something like this: + * + * @code + * text() == $qpb->text()) { + * return 0; + * } + * return $qpa->text()> $qpb->text()? 1 : -1; + * }; + * + * $qp = QueryPath::with($xml, $selector)->sort($comp); + * ?> + * @endcode + * + * @param callback $comparator + * A callback. This will be called during sorting to compare two DOMNode + * objects. + * @param boolean $modifyDOM + * If this is TRUE, the sorted results will be inserted back into + * the DOM at the position of the original first element. + * @return \QueryPath\DOMQuery + * This object. + */ + public function sort($comparator, $modifyDOM = FALSE) { + // Sort as an array. + $list = iterator_to_array($this->matches); + + if (empty($list)) { + return $this; + } + + $oldFirst = $list[0]; + + usort($list, $comparator); + + // Copy back into SplObjectStorage. + $found = new \SplObjectStorage(); + foreach ($list as $node) { + $found->attach($node); + } + //$this->setMatches($found); + + + // Do DOM modifications only if necessary. + if ($modifyDOM) { + $placeholder = $oldFirst->ownerDocument->createElement('_PLACEHOLDER_'); + $placeholder = $oldFirst->parentNode->insertBefore($placeholder, $oldFirst); + $len = count($list); + for ($i = 0; $i < $len; ++$i) { + $node = $list[$i]; + $node = $node->parentNode->removeChild($node); + $placeholder->parentNode->insertBefore($node, $placeholder); + } + $placeholder->parentNode->removeChild($placeholder); + } + + return $this->inst($found, NULL, $this->options); + } + /** + * Filter based on a lambda function. + * + * The function string will be executed as if it were the body of a + * function. It is passed two arguments: + * - $index: The index of the item. + * - $item: The current Element. + * If the function returns boolean FALSE, the item will be removed from + * the list of elements. Otherwise it will be kept. + * + * Example: + * @code + * qp('li')->filterLambda('qp($item)->attr("id") == "test"'); + * @endcode + * + * The above would filter down the list to only an item whose ID is + * 'text'. + * + * @param string $fn + * Inline lambda function in a string. + * @return \QueryPath\DOMQuery + * @see filter() + * @see map() + * @see mapLambda() + * @see filterCallback() + */ + public function filterLambda($fn) { + $function = create_function('$index, $item', $fn); + $found = new \SplObjectStorage(); + $i = 0; + foreach ($this->matches as $item) + if ($function($i++, $item) !== FALSE) $found->attach($item); + + return $this->inst($found, NULL, $this->options); + } + + /** + * Use regular expressions to filter based on the text content of matched elements. + * + * Only items that match the given regular expression will be kept. All others will + * be removed. + * + * The regular expression is run against the text content (the PCDATA) of the + * elements. This is a way of filtering elements based on their content. + * + * Example: + * @code + * + *
Hello World
+ * @endcode + * + * @code + * filterPreg('/World/')->size(); + * ?> + * @endcode + * + * The return value above will be 1 because the text content of @codeqp($xml, 'div')@endcode is + * @codeHello World@endcode. + * + * Compare this to the behavior of the :contains() CSS3 pseudo-class. + * + * @param string $regex + * A regular expression. + * @return \QueryPath\DOMQuery + * @see filter() + * @see filterCallback() + * @see preg_match() + */ + public function filterPreg($regex) { + + $found = new \SplObjectStorage(); + + foreach ($this->matches as $item) { + if (preg_match($regex, $item->textContent) > 0) { + $found->attach($item); + } + } + return $this->inst($found, NULL, $this->options); + } + /** + * Filter based on a callback function. + * + * A callback may be any of the following: + * - a function: 'my_func'. + * - an object/method combo: $obj, 'myMethod' + * - a class/method combo: 'MyClass', 'myMethod' + * Note that classes are passed in strings. Objects are not. + * + * Each callback is passed to arguments: + * - $index: The index position of the object in the array. + * - $item: The item to be operated upon. + * + * If the callback function returns FALSE, the item will be removed from the + * set of matches. Otherwise the item will be considered a match and left alone. + * + * @param callback $callback. + * A callback either as a string (function) or an array (object, method OR + * classname, method). + * @return \QueryPath\DOMQuery + * Query path object augmented according to the function. + * @see filter() + * @see filterLambda() + * @see map() + * @see is() + * @see find() + */ + public function filterCallback($callback) { + $found = new \SplObjectStorage(); + $i = 0; + if (is_callable($callback)) { + foreach($this->matches as $item) + if (call_user_func($callback, $i++, $item) !== FALSE) $found->attach($item); + } + else { + throw new \QueryPath\Exception('The specified callback is not callable.'); + } + return $this->inst($found, NULL, $this->options); + } + /** + * Filter a list to contain only items that do NOT match. + * + * @param string $selector + * A selector to use as a negation filter. If the filter is matched, the + * element will be removed from the list. + * @return \QueryPath\DOMQuery + * The DOMQuery object with matching items filtered out. + * @see find() + */ + public function not($selector) { + $found = new \SplObjectStorage(); + if ($selector instanceof \DOMElement) { + foreach ($this->matches as $m) if ($m !== $selector) $found->attach($m); + } + elseif (is_array($selector)) { + foreach ($this->matches as $m) { + if (!in_array($m, $selector, TRUE)) $found->attach($m); + } + } + elseif ($selector instanceof \SplObjectStorage) { + foreach ($this->matches as $m) if ($selector->contains($m)) $found->attach($m); + } + else { + foreach ($this->matches as $m) if (!QueryPath::with($m, NULL, $this->options)->is($selector)) $found->attach($m); + } + return $this->inst($found, NULL, $this->options); + } + /** + * Get an item's index. + * + * Given a DOMElement, get the index from the matches. This is the + * converse of {@link get()}. + * + * @param DOMElement $subject + * The item to match. + * + * @return mixed + * The index as an integer (if found), or boolean FALSE. Since 0 is a + * valid index, you should use strong equality (===) to test.. + * @see get() + * @see is() + */ + public function index($subject) { + + $i = 0; + foreach ($this->matches as $m) { + if ($m === $subject) { + return $i; + } + ++$i; + } + return FALSE; + } + /** + * Run a function on each item in a set. + * + * The mapping callback can return anything. Whatever it returns will be + * stored as a match in the set, though. This means that afer a map call, + * there is no guarantee that the elements in the set will behave correctly + * with other DOMQuery functions. + * + * Callback rules: + * - If the callback returns NULL, the item will be removed from the array. + * - If the callback returns an array, the entire array will be stored in + * the results. + * - If the callback returns anything else, it will be appended to the array + * of matches. + * + * @param callback $callback + * The function or callback to use. The callback will be passed two params: + * - $index: The index position in the list of items wrapped by this object. + * - $item: The current item. + * + * @return \QueryPath\DOMQuery + * The DOMQuery object wrapping a list of whatever values were returned + * by each run of the callback. + * + * @see DOMQuery::get() + * @see filter() + * @see find() + */ + public function map($callback) { + $found = new \SplObjectStorage(); + + if (is_callable($callback)) { + $i = 0; + foreach ($this->matches as $item) { + $c = call_user_func($callback, $i, $item); + if (isset($c)) { + if (is_array($c) || $c instanceof \Iterable) { + foreach ($c as $retval) { + if (!is_object($retval)) { + $tmp = new \stdClass(); + $tmp->textContent = $retval; + $retval = $tmp; + } + $found->attach($retval); + } + } + else { + if (!is_object($c)) { + $tmp = new \stdClass(); + $tmp->textContent = $c; + $c = $tmp; + } + $found->attach($c); + } + } + ++$i; + } + } + else { + throw new \QueryPath\Exception('Callback is not callable.'); + } + return $this->inst($found, NULL, $this->options); + } + /** + * Narrow the items in this object down to only a slice of the starting items. + * + * @param integer $start + * Where in the list of matches to begin the slice. + * @param integer $length + * The number of items to include in the slice. If nothing is specified, the + * all remaining matches (from $start onward) will be included in the sliced + * list. + * @return \QueryPath\DOMQuery + * @see array_slice() + */ + public function slice($start, $length = 0) { + $end = $length; + $found = new \SplObjectStorage(); + if ($start >= $this->size()) { + return $this->inst($found, NULL, $this->options); + } + + $i = $j = 0; + foreach ($this->matches as $m) { + if ($i >= $start) { + if ($end > 0 && $j >= $end) { + break; + } + $found->attach($m); + ++$j; + } + ++$i; + } + + return $this->inst($found, NULL, $this->options); + } + /** + * Run a callback on each item in the list of items. + * + * Rules of the callback: + * - A callback is passed two variables: $index and $item. (There is no + * special treatment of $this, as there is in jQuery.) + * - You will want to pass $item by reference if it is not an + * object (DOMNodes are all objects). + * - A callback that returns FALSE will stop execution of the each() loop. This + * works like break in a standard loop. + * - A TRUE return value from the callback is analogous to a continue statement. + * - All other return values are ignored. + * + * @param callback $callback + * The callback to run. + * @return \QueryPath\DOMQuery + * The DOMQuery. + * @see eachLambda() + * @see filter() + * @see map() + */ + public function each($callback) { + if (is_callable($callback)) { + $i = 0; + foreach ($this->matches as $item) { + if (call_user_func($callback, $i, $item) === FALSE) return $this; + ++$i; + } + } + else { + throw new \QueryPath\Exception('Callback is not callable.'); + } + return $this; + } + /** + * An each() iterator that takes a lambda function. + * + * @deprecated + * Since PHP 5.3 supports anonymous functions -- REAL Lambdas -- this + * method is not necessary and should be avoided. + * @param string $lambda + * The lambda function. This will be passed ($index, &$item). + * @return \QueryPath\DOMQuery + * The DOMQuery object. + * @see each() + * @see filterLambda() + * @see filterCallback() + * @see map() + */ + public function eachLambda($lambda) { + $index = 0; + foreach ($this->matches as $item) { + $fn = create_function('$index, &$item', $lambda); + if ($fn($index, $item) === FALSE) return $this; + ++$index; + } + return $this; + } + /** + * Insert the given markup as the last child. + * + * The markup will be inserted into each match in the set. + * + * The same element cannot be inserted multiple times into a document. DOM + * documents do not allow a single object to be inserted multiple times + * into the DOM. To insert the same XML repeatedly, we must first clone + * the object. This has one practical implication: Once you have inserted + * an element into the object, you cannot further manipulate the original + * element and expect the changes to be replciated in the appended object. + * (They are not the same -- there is no shared reference.) Instead, you + * will need to retrieve the appended object and operate on that. + * + * @param mixed $data + * This can be either a string (the usual case), or a DOM Element. + * @return \QueryPath\DOMQuery + * The DOMQuery object. + * @see appendTo() + * @see prepend() + * @throws QueryPath::Exception + * Thrown if $data is an unsupported object type. + */ + public function append($data) { + $data = $this->prepareInsert($data); + if (isset($data)) { + if (empty($this->document->documentElement) && $this->matches->count() == 0) { + // Then we assume we are writing to the doc root + $this->document->appendChild($data); + $found = new \SplObjectStorage(); + $found->attach($this->document->documentElement); + $this->setMatches($found); + } + else { + // You can only append in item once. So in cases where we + // need to append multiple times, we have to clone the node. + foreach ($this->matches as $m) { + // DOMDocumentFragments are even more troublesome, as they don't + // always clone correctly. So we have to clone their children. + if ($data instanceof \DOMDocumentFragment) { + foreach ($data->childNodes as $n) + $m->appendChild($n->cloneNode(TRUE)); + } + else { + // Otherwise a standard clone will do. + $m->appendChild($data->cloneNode(TRUE)); + } + + } + } + + } + return $this; + } + /** + * Append the current elements to the destination passed into the function. + * + * This cycles through all of the current matches and appends them to + * the context given in $destination. If a selector is provided then the + * $destination is queried (using that selector) prior to the data being + * appended. The data is then appended to the found items. + * + * @param DOMQuery $dest + * A DOMQuery object that will be appended to. + * @return \QueryPath\DOMQuery + * The original DOMQuery, unaltered. Only the destination DOMQuery will + * be modified. + * @see append() + * @see prependTo() + * @throws QueryPath::Exception + * Thrown if $data is an unsupported object type. + */ + public function appendTo(DOMQuery $dest) { + foreach ($this->matches as $m) $dest->append($m); + return $this; + } + /** + * Insert the given markup as the first child. + * + * The markup will be inserted into each match in the set. + * + * @param mixed $data + * This can be either a string (the usual case), or a DOM Element. + * @return \QueryPath\DOMQuery + * @see append() + * @see before() + * @see after() + * @see prependTo() + * @throws QueryPath::Exception + * Thrown if $data is an unsupported object type. + */ + public function prepend($data) { + $data = $this->prepareInsert($data); + if (isset($data)) { + foreach ($this->matches as $m) { + $ins = $data->cloneNode(TRUE); + if ($m->hasChildNodes()) + $m->insertBefore($ins, $m->childNodes->item(0)); + else + $m->appendChild($ins); + } + } + return $this; + } + /** + * Take all nodes in the current object and prepend them to the children nodes of + * each matched node in the passed-in DOMQuery object. + * + * This will iterate through each item in the current DOMQuery object and + * add each item to the beginning of the children of each element in the + * passed-in DOMQuery object. + * + * @see insertBefore() + * @see insertAfter() + * @see prepend() + * @see appendTo() + * @param DOMQuery $dest + * The destination DOMQuery object. + * @return \QueryPath\DOMQuery + * The original DOMQuery, unmodified. NOT the destination DOMQuery. + * @throws QueryPath::Exception + * Thrown if $data is an unsupported object type. + */ + public function prependTo(DOMQuery $dest) { + foreach ($this->matches as $m) $dest->prepend($m); + return $this; + } + + /** + * Insert the given data before each element in the current set of matches. + * + * This will take the give data (XML or HTML) and put it before each of the items that + * the DOMQuery object currently contains. Contrast this with after(). + * + * @param mixed $data + * The data to be inserted. This can be XML in a string, a DomFragment, a DOMElement, + * or the other usual suspects. (See {@link qp()}). + * @return \QueryPath\DOMQuery + * Returns the DOMQuery with the new modifications. The list of elements currently + * selected will remain the same. + * @see insertBefore() + * @see after() + * @see append() + * @see prepend() + * @throws QueryPath::Exception + * Thrown if $data is an unsupported object type. + */ + public function before($data) { + $data = $this->prepareInsert($data); + foreach ($this->matches as $m) { + $ins = $data->cloneNode(TRUE); + $m->parentNode->insertBefore($ins, $m); + } + + return $this; + } + /** + * Insert the current elements into the destination document. + * The items are inserted before each element in the given DOMQuery document. + * That is, they will be siblings with the current elements. + * + * @param DOMQuery $dest + * Destination DOMQuery document. + * @return \QueryPath\DOMQuery + * The current DOMQuery object, unaltered. Only the destination DOMQuery + * object is altered. + * @see before() + * @see insertAfter() + * @see appendTo() + * @throws QueryPath::Exception + * Thrown if $data is an unsupported object type. + */ + public function insertBefore(DOMQuery $dest) { + foreach ($this->matches as $m) $dest->before($m); + return $this; + } + /** + * Insert the contents of the current DOMQuery after the nodes in the + * destination DOMQuery object. + * + * @param DOMQuery $dest + * Destination object where the current elements will be deposited. + * @return \QueryPath\DOMQuery + * The present DOMQuery, unaltered. Only the destination object is altered. + * @see after() + * @see insertBefore() + * @see append() + * @throws QueryPath::Exception + * Thrown if $data is an unsupported object type. + */ + public function insertAfter(DOMQuery $dest) { + foreach ($this->matches as $m) $dest->after($m); + return $this; + } + /** + * Insert the given data after each element in the current DOMQuery object. + * + * This inserts the element as a peer to the currently matched elements. + * Contrast this with {@link append()}, which inserts the data as children + * of matched elements. + * + * @param mixed $data + * The data to be appended. + * @return \QueryPath\DOMQuery + * The DOMQuery object (with the items inserted). + * @see before() + * @see append() + * @throws QueryPath::Exception + * Thrown if $data is an unsupported object type. + */ + public function after($data) { + if (empty($data)) { + return $this; + } + $data = $this->prepareInsert($data); + foreach ($this->matches as $m) { + $ins = $data->cloneNode(TRUE); + if (isset($m->nextSibling)) + $m->parentNode->insertBefore($ins, $m->nextSibling); + else + $m->parentNode->appendChild($ins); + } + return $this; + } + /** + * Replace the existing element(s) in the list with a new one. + * + * @param mixed $new + * A DOMElement or XML in a string. This will replace all elements + * currently wrapped in the DOMQuery object. + * @return \QueryPath\DOMQuery + * The DOMQuery object wrapping the items that were removed. + * This remains consistent with the jQuery API. + * @see append() + * @see prepend() + * @see before() + * @see after() + * @see remove() + * @see replaceAll() + */ + public function replaceWith($new) { + $data = $this->prepareInsert($new); + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) { + $parent = $m->parentNode; + $parent->insertBefore($data->cloneNode(TRUE), $m); + $found->attach($parent->removeChild($m)); + } + return $this->inst($found, NULL, $this->options); + } + /** + * Remove the parent element from the selected node or nodes. + * + * This takes the given list of nodes and "unwraps" them, moving them out of their parent + * node, and then deleting the parent node. + * + * For example, consider this: + * + * @code + * + * @endcode + * + * Now we can run this code: + * @code + * qp($xml, 'content')->unwrap(); + * @endcode + * + * This will result in: + * + * @code + * + * @endcode + * This is the opposite of wrap(). + * + * The root element cannot be unwrapped. It has no parents. + * If you attempt to use unwrap on a root element, this will throw a + * QueryPath::Exception. (You can, however, "Unwrap" a child that is + * a direct descendant of the root element. This will remove the root + * element, and replace the child as the root element. Be careful, though. + * You cannot set more than one child as a root element.) + * + * @return \QueryPath\DOMQuery + * The DOMQuery object, with the same element(s) selected. + * @throws QueryPath::Exception + * An exception is thrown if one attempts to unwrap a root element. + * @see wrap() + * @since 2.1 + * @author mbutcher + */ + public function unwrap() { + + // We do this in two loops in order to + // capture the case where two matches are + // under the same parent. Othwerwise we might + // remove a match before we can move it. + $parents = new \SplObjectStorage(); + foreach ($this->matches as $m) { + + // Cannot unwrap the root element. + if ($m->isSameNode($m->ownerDocument->documentElement)) { + throw new \QueryPath\Exception('Cannot unwrap the root element.'); + } + + // Move children to peer of parent. + $parent = $m->parentNode; + $old = $parent->removeChild($m); + $parent->parentNode->insertBefore($old, $parent); + $parents->attach($parent); + } + + // Now that all the children are moved, we + // remove all of the parents. + foreach ($parents as $ele) { + $ele->parentNode->removeChild($ele); + } + + return $this; + } + /** + * Wrap each element inside of the given markup. + * + * Markup is usually a string, but it can also be a DOMNode, a document + * fragment, a SimpleXMLElement, or another DOMNode object (in which case + * the first item in the list will be used.) + * + * @param mixed $markup + * Markup that will wrap each element in the current list. + * @return \QueryPath\DOMQuery + * The DOMQuery object with the wrapping changes made. + * @see wrapAll() + * @see wrapInner() + */ + public function wrap($markup) { + $data = $this->prepareInsert($markup); + + // If the markup passed in is empty, we don't do any wrapping. + if (empty($data)) { + return $this; + } + + foreach ($this->matches as $m) { + if ($data instanceof \DOMDocumentFragment) { + $copy = $data->firstChild->cloneNode(true); + } else { + $copy = $data->cloneNode(true); + } + + // XXX: Should be able to avoid doing this over and over. + if ($copy->hasChildNodes()) { + $deepest = $this->deepestNode($copy); + // FIXME: Does this need a different data structure? + $bottom = $deepest[0]; + } + else + $bottom = $copy; + + $parent = $m->parentNode; + $parent->insertBefore($copy, $m); + $m = $parent->removeChild($m); + $bottom->appendChild($m); + //$parent->appendChild($copy); + } + return $this; + } + /** + * Wrap all elements inside of the given markup. + * + * So all elements will be grouped together under this single marked up + * item. This works by first determining the parent element of the first item + * in the list. It then moves all of the matching elements under the wrapper + * and inserts the wrapper where that first element was found. (This is in + * accordance with the way jQuery works.) + * + * Markup is usually XML in a string, but it can also be a DOMNode, a document + * fragment, a SimpleXMLElement, or another DOMNode object (in which case + * the first item in the list will be used.) + * + * @param string $markup + * Markup that will wrap all elements in the current list. + * @return \QueryPath\DOMQuery + * The DOMQuery object with the wrapping changes made. + * @see wrap() + * @see wrapInner() + */ + public function wrapAll($markup) { + if ($this->matches->count() == 0) return; + + $data = $this->prepareInsert($markup); + + if (empty($data)) { + return $this; + } + + if ($data instanceof \DOMDocumentFragment) { + $data = $data->firstChild->cloneNode(true); + } else { + $data = $data->cloneNode(true); + } + + if ($data->hasChildNodes()) { + $deepest = $this->deepestNode($data); + // FIXME: Does this need fixing? + $bottom = $deepest[0]; + } + else + $bottom = $data; + + $first = $this->getFirstMatch(); + $parent = $first->parentNode; + $parent->insertBefore($data, $first); + foreach ($this->matches as $m) { + $bottom->appendChild($m->parentNode->removeChild($m)); + } + return $this; + } + /** + * Wrap the child elements of each item in the list with the given markup. + * + * Markup is usually a string, but it can also be a DOMNode, a document + * fragment, a SimpleXMLElement, or another DOMNode object (in which case + * the first item in the list will be used.) + * + * @param string $markup + * Markup that will wrap children of each element in the current list. + * @return \QueryPath\DOMQuery + * The DOMQuery object with the wrapping changes made. + * @see wrap() + * @see wrapAll() + */ + public function wrapInner($markup) { + $data = $this->prepareInsert($markup); + + // No data? Short circuit. + if (empty($data)) return $this; + + foreach ($this->matches as $m) { + if ($data instanceof \DOMDocumentFragment) { + $wrapper = $data->firstChild->cloneNode(true); + } else { + $wrapper = $data->cloneNode(true); + } + + if ($wrapper->hasChildNodes()) { + $deepest = $this->deepestNode($wrapper); + // FIXME: ??? + $bottom = $deepest[0]; + } + else + $bottom = $wrapper; + + if ($m->hasChildNodes()) { + while($m->firstChild) { + $kid = $m->removeChild($m->firstChild); + $bottom->appendChild($kid); + } + } + + $m->appendChild($wrapper); + } + return $this; + } + /** + * Reduce the set of matches to the deepest child node in the tree. + * + * This loops through the matches and looks for the deepest child node of all of + * the matches. "Deepest", here, is relative to the nodes in the list. It is + * calculated as the distance from the starting node to the most distant child + * node. In other words, it is not necessarily the farthest node from the root + * element, but the farthest note from the matched element. + * + * In the case where there are multiple nodes at the same depth, all of the + * nodes at that depth will be included. + * + * @return \QueryPath\DOMQuery + * The DOMQuery wrapping the single deepest node. + */ + public function deepest() { + $deepest = 0; + $winner = new \SplObjectStorage(); + foreach ($this->matches as $m) { + $local_deepest = 0; + $local_ele = $this->deepestNode($m, 0, NULL, $local_deepest); + + // Replace with the new deepest. + if ($local_deepest > $deepest) { + $winner = new \SplObjectStorage(); + foreach ($local_ele as $lele) $winner->attach($lele); + $deepest = $local_deepest; + } + // Augument with other equally deep elements. + elseif ($local_deepest == $deepest) { + foreach ($local_ele as $lele) + $winner->attach($lele); + } + } + return $this->inst($winner, NULL, $this->options); + } + + /** + * A depth-checking function. Typically, it only needs to be + * invoked with the first parameter. The rest are used for recursion. + * @see deepest(); + * @param DOMNode $ele + * The element. + * @param int $depth + * The depth guage + * @param mixed $current + * The current set. + * @param DOMNode $deepest + * A reference to the current deepest node. + * @return array + * Returns an array of DOM nodes. + */ + protected function deepestNode(\DOMNode $ele, $depth = 0, $current = NULL, &$deepest = NULL) { + // FIXME: Should this use SplObjectStorage? + if (!isset($current)) $current = array($ele); + if (!isset($deepest)) $deepest = $depth; + if ($ele->hasChildNodes()) { + foreach ($ele->childNodes as $child) { + if ($child->nodeType === XML_ELEMENT_NODE) { + $current = $this->deepestNode($child, $depth + 1, $current, $deepest); + } + } + } + elseif ($depth > $deepest) { + $current = array($ele); + $deepest = $depth; + } + elseif ($depth === $deepest) { + $current[] = $ele; + } + return $current; + } + + /** + * Prepare an item for insertion into a DOM. + * + * This handles a variety of boilerplate tasks that need doing before an + * indeterminate object can be inserted into a DOM tree. + * - If item is a string, this is converted into a document fragment and returned. + * - If item is a DOMQuery, then all items are retrieved and converted into + * a document fragment and returned. + * - If the item is a DOMNode, it is imported into the current DOM if necessary. + * - If the item is a SimpleXMLElement, it is converted into a DOM node and then + * imported. + * + * @param mixed $item + * Item to prepare for insert. + * @return mixed + * Returns the prepared item. + * @throws QueryPath::Exception + * Thrown if the object passed in is not of a supprted object type. + */ + protected function prepareInsert($item) { + if(empty($item)) { + return; + } + elseif (is_string($item)) { + // If configured to do so, replace all entities. + if ($this->options['replace_entities']) { + $item = \QueryPath\Entities::replaceAllEntities($item); + } + + $frag = $this->document->createDocumentFragment(); + try { + set_error_handler(array('\QueryPath\ParseException', 'initializeFromError'), $this->errTypes); + $frag->appendXML($item); + } + // Simulate a finally block. + catch (Exception $e) { + restore_error_handler(); + throw $e; + } + restore_error_handler(); + return $frag; + } + elseif ($item instanceof DOMQuery) { + if ($item->size() == 0) + return; + + $frag = $this->document->createDocumentFragment(); + foreach ($item->matches as $m) { + $frag->appendXML($item->document->saveXML($m)); + } + return $frag; + } + elseif ($item instanceof \DOMNode) { + if ($item->ownerDocument !== $this->document) { + // Deep clone this and attach it to this document + $item = $this->document->importNode($item, TRUE); + } + return $item; + } + elseif ($item instanceof \SimpleXMLElement) { + $element = dom_import_simplexml($item); + return $this->document->importNode($element, TRUE); + } + // What should we do here? + //var_dump($item); + throw new \QueryPath\Exception("Cannot prepare item of unsupported type: " . gettype($item)); + } + /** + * The tag name of the first element in the list. + * + * This returns the tag name of the first element in the list of matches. If + * the list is empty, an empty string will be used. + * + * @see replaceAll() + * @see replaceWith() + * @return string + * The tag name of the first element in the list. + */ + public function tag() { + return ($this->size() > 0) ? $this->getFirstMatch()->tagName : ''; + } + /** + * Remove any items from the list if they match the selector. + * + * In other words, each item that matches the selector will be remove + * from the DOM document. The returned DOMQuery wraps the list of + * removed elements. + * + * If no selector is specified, this will remove all current matches from + * the document. + * + * @param string $selector + * A CSS Selector. + * @return \QueryPath\DOMQuery + * The Query path wrapping a list of removed items. + * @see replaceAll() + * @see replaceWith() + * @see removeChildren() + */ + public function remove($selector = NULL) { + if(!empty($selector)) { + // Do a non-destructive find. + $query = new QueryPathEventHandler($this->matches); + $query->find($selector); + $matches = $query->getMatches(); + } + else { + $matches = $this->matches; + } + + $found = new \SplObjectStorage(); + foreach ($matches as $item) { + // The item returned is (according to docs) different from + // the one passed in, so we have to re-store it. + $found->attach($item->parentNode->removeChild($item)); + } + + // Return a clone DOMQuery with just the removed items. If + // no items are found, this will return an empty DOMQuery. + return count($found) == 0 ? new static() : new static($found); + } + /** + * This replaces everything that matches the selector with the first value + * in the current list. + * + * This is the reverse of replaceWith. + * + * Unlike jQuery, DOMQuery cannot assume a default document. Consequently, + * you must specify the intended destination document. If it is omitted, the + * present document is assumed to be tthe document. However, that can result + * in undefined behavior if the selector and the replacement are not sufficiently + * distinct. + * + * @param string $selector + * The selector. + * @param DOMDocument $document + * The destination document. + * @return \QueryPath\DOMQuery + * The DOMQuery wrapping the modified document. + * @deprecated Due to the fact that this is not a particularly friendly method, + * and that it can be easily replicated using {@see replaceWith()}, it is to be + * considered deprecated. + * @see remove() + * @see replaceWith() + */ + public function replaceAll($selector, \DOMDocument $document) { + $replacement = $this->size() > 0 ? $this->getFirstMatch() : $this->document->createTextNode(''); + + $c = new QueryPathEventHandler($document); + $c->find($selector); + $temp = $c->getMatches(); + foreach ($temp as $item) { + $node = $replacement->cloneNode(); + $node = $document->importNode($node); + $item->parentNode->replaceChild($node, $item); + } + return QueryPath::with($document, NULL, $this->options); + } + /** + * Add more elements to the current set of matches. + * + * This begins the new query at the top of the DOM again. The results found + * when running this selector are then merged into the existing results. In + * this way, you can add additional elements to the existing set. + * + * @param string $selector + * A valid selector. + * @return \QueryPath\DOMQuery + * The DOMQuery object with the newly added elements. + * @see append() + * @see after() + * @see andSelf() + * @see end() + */ + public function add($selector) { + + // This is destructive, so we need to set $last: + $this->last = $this->matches; + + foreach (QueryPath::with($this->document, $selector, $this->options)->get() as $item) { + $this->matches->attach($item); + } + return $this; + } + /** + * Revert to the previous set of matches. + * + * DEPRECATED Do not use. + * + * This will revert back to the last set of matches (before the last + * "destructive" set of operations). This undoes any change made to the set of + * matched objects. Functions like find() and filter() change the + * list of matched objects. The end() function will revert back to the last set of + * matched items. + * + * Note that functions that modify the document, but do not change the list of + * matched objects, are not "destructive". Thus, calling append('something')->end() + * will not undo the append() call. + * + * Only one level of changes is stored. Reverting beyond that will result in + * an empty set of matches. Example: + * + * @code + * // The line below returns the same thing as qp(document, 'p'); + * qp(document, 'p')->find('div')->end(); + * // This returns an empty array: + * qp(document, 'p')->end(); + * // This returns an empty array: + * qp(document, 'p')->find('div')->find('span')->end()->end(); + * @endcode + * + * The last one returns an empty array because only one level of changes is stored. + * + * @return \QueryPath\DOMQuery + * A DOMNode object reflecting the list of matches prior to the last destructive + * operation. + * @see andSelf() + * @see add() + * @deprecated This function will be removed. + */ + public function end() { + // Note that this does not use setMatches because it must set the previous + // set of matches to empty array. + $this->matches = $this->last; + $this->last = new \SplObjectStorage(); + return $this; + } + /** + * Combine the current and previous set of matched objects. + * + * Example: + * + * @code + * qp(document, 'p')->find('div')->andSelf(); + * @endcode + * + * The code above will contain a list of all p elements and all div elements that + * are beneath p elements. + * + * @see end(); + * @return \QueryPath\DOMQuery + * A DOMNode object with the results of the last two "destructive" operations. + * @see add() + * @see end() + */ + public function andSelf() { + // This is destructive, so we need to set $last: + $last = $this->matches; + + foreach ($this->last as $item) $this->matches->attach($item); + + $this->last = $last; + return $this; + } + /** + * Remove all child nodes. + * + * This is equivalent to jQuery's empty() function. (However, empty() is a + * PHP built-in, and cannot be used as a method name.) + * + * @return \QueryPath\DOMQuery + * The DOMQuery object with the child nodes removed. + * @see replaceWith() + * @see replaceAll() + * @see remove() + */ + public function removeChildren() { + foreach ($this->matches as $m) { + while($kid = $m->firstChild) { + $m->removeChild($kid); + } + } + return $this; + } + /** + * Get the children of the elements in the DOMQuery object. + * + * If a selector is provided, the list of children will be filtered through + * the selector. + * + * @param string $selector + * A valid selector. + * @return \QueryPath\DOMQuery + * A DOMQuery wrapping all of the children. + * @see removeChildren() + * @see parent() + * @see parents() + * @see next() + * @see prev() + */ + public function children($selector = NULL) { + $found = new \SplObjectStorage(); + $filter = strlen($selector) > 0; + + if ($filter) { + $tmp = new \SplObjectStorage(); + } + foreach ($this->matches as $m) { + foreach($m->childNodes as $c) { + if ($c->nodeType == XML_ELEMENT_NODE) { + // This is basically an optimized filter() just for children(). + if ($filter) { + $tmp->attach($c); + $query = new \QueryPath\CSS\DOMTraverser($tmp, TRUE, $c); + $query->find($selector); + if (count($query->matches()) > 0) { + $found->attach($c); + } + $tmp->detach($c); + + } + // No filter. Just attach it. + else { + $found->attach($c); + } + } + } + } + $new = $this->inst($found, NULL, $this->options); + return $new; + } + /** + * Get all child nodes (not just elements) of all items in the matched set. + * + * It gets only the immediate children, not all nodes in the subtree. + * + * This does not process iframes. Xinclude processing is dependent on the + * DOM implementation and configuration. + * + * @return \QueryPath\DOMQuery + * A DOMNode object wrapping all child nodes for all elements in the + * DOMNode object. + * @see find() + * @see text() + * @see html() + * @see innerHTML() + * @see xml() + * @see innerXML() + */ + public function contents() { + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) { + if (empty($m->childNodes)) continue; // Issue #51 + foreach ($m->childNodes as $c) { + $found->attach($c); + } + } + return $this->inst($found, NULL, $this->options); + } + /** + * Get a list of siblings for elements currently wrapped by this object. + * + * This will compile a list of every sibling of every element in the + * current list of elements. + * + * Note that if two siblings are present in the DOMQuery object to begin with, + * then both will be returned in the matched set, since they are siblings of each + * other. In other words,if the matches contain a and b, and a and b are siblings of + * each other, than running siblings will return a set that contains + * both a and b. + * + * @param string $selector + * If the optional selector is provided, siblings will be filtered through + * this expression. + * @return \QueryPath\DOMQuery + * The DOMQuery containing the matched siblings. + * @see contents() + * @see children() + * @see parent() + * @see parents() + */ + public function siblings($selector = NULL) { + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) { + $parent = $m->parentNode; + foreach ($parent->childNodes as $n) { + if ($n->nodeType == XML_ELEMENT_NODE && $n !== $m) { + $found->attach($n); + } + } + } + if (empty($selector)) { + return $this->inst($found, NULL, $this->options); + } + else { + return $this->inst($found, NULL, $this->options)->filter($selector); + } + } + /** + * Find the closest element matching the selector. + * + * This finds the closest match in the ancestry chain. It first checks the + * present element. If the present element does not match, this traverses up + * the ancestry chain (e.g. checks each parent) looking for an item that matches. + * + * It is provided for jQuery 1.3 compatibility. + * @param string $selector + * A CSS Selector to match. + * @return \QueryPath\DOMQuery + * The set of matches. + * @since 2.0 + */ + public function closest($selector) { + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) { + + if (QueryPath::with($m, NULL, $this->options)->is($selector) > 0) { + $found->attach($m); + } + else { + while ($m->parentNode->nodeType !== XML_DOCUMENT_NODE) { + $m = $m->parentNode; + // Is there any case where parent node is not an element? + if ($m->nodeType === XML_ELEMENT_NODE && QueryPath::with($m, NULL, $this->options)->is($selector) > 0) { + $found->attach($m); + break; + } + } + } + + } + // XXX: Should this be an in-place modification? + return $this->inst($found, NULL, $this->options); + //$this->setMatches($found); + //return $this; + } + /** + * Get the immediate parent of each element in the DOMQuery. + * + * If a selector is passed, this will return the nearest matching parent for + * each element in the DOMQuery. + * + * @param string $selector + * A valid CSS3 selector. + * @return \QueryPath\DOMQuery + * A DOMNode object wrapping the matching parents. + * @see children() + * @see siblings() + * @see parents() + */ + public function parent($selector = NULL) { + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) { + while ($m->parentNode->nodeType !== XML_DOCUMENT_NODE) { + $m = $m->parentNode; + // Is there any case where parent node is not an element? + if ($m->nodeType === XML_ELEMENT_NODE) { + if (!empty($selector)) { + if (QueryPath::with($m, NULL, $this->options)->is($selector) > 0) { + $found->attach($m); + break; + } + } + else { + $found->attach($m); + break; + } + } + } + } + return $this->inst($found, NULL, $this->options); + } + /** + * Get all ancestors of each element in the DOMQuery. + * + * If a selector is present, only matching ancestors will be retrieved. + * + * @see parent() + * @param string $selector + * A valid CSS 3 Selector. + * @return \QueryPath\DOMQuery + * A DOMNode object containing the matching ancestors. + * @see siblings() + * @see children() + */ + public function parents($selector = NULL) { + $found = new \SplObjectStorage(); + foreach ($this->matches as $m) { + while ($m->parentNode->nodeType !== XML_DOCUMENT_NODE) { + $m = $m->parentNode; + // Is there any case where parent node is not an element? + if ($m->nodeType === XML_ELEMENT_NODE) { + if (!empty($selector)) { + if (QueryPath::with($m, NULL, $this->options)->is($selector) > 0) + $found->attach($m); + } + else + $found->attach($m); + } + } + } + return $this->inst($found, NULL, $this->options); + } + /** + * Set or get the markup for an element. + * + * If $markup is set, then the giving markup will be injected into each + * item in the set. All other children of that node will be deleted, and this + * new code will be the only child or children. The markup MUST BE WELL FORMED. + * + * If no markup is given, this will return a string representing the child + * markup of the first node. + * + * Important: This differs from jQuery's html() function. This function + * returns the current node and all of its children. jQuery returns only + * the children. This means you do not need to do things like this: + * @code$qp->parent()->html()@endcode. + * + * By default, this is HTML 4.01, not XHTML. Use {@link xml()} for XHTML. + * + * @param string $markup + * The text to insert. + * @return mixed + * A string if no markup was passed, or a DOMQuery if markup was passed. + * @see xml() + * @see text() + * @see contents() + */ + public function html($markup = NULL) { + if (isset($markup)) { + + if ($this->options['replace_entities']) { + $markup = \QueryPath\Entities::replaceAllEntities($markup); + } + + // Parse the HTML and insert it into the DOM + //$doc = DOMDocument::loadHTML($markup); + $doc = $this->document->createDocumentFragment(); + $doc->appendXML($markup); + $this->removeChildren(); + $this->append($doc); + return $this; + } + $length = $this->size(); + if ($length == 0) { + return NULL; + } + // Only return the first item -- that's what JQ does. + $first = $this->getFirstMatch(); + + // Catch cases where first item is not a legit DOM object. + if (!($first instanceof \DOMNode)) { + return NULL; + } + + // Added by eabrand. + if(!$first->ownerDocument->documentElement) { + return NULL; + } + + if ($first instanceof \DOMDocument || $first->isSameNode($first->ownerDocument->documentElement)) { + return $this->document->saveHTML(); + } + // saveHTML cannot take a node and serialize it. + return $this->document->saveXML($first); + } + + /** + * Write the QueryPath document to HTML5. + * + * See html() + */ + function html5($markup = NULL) { + $html5 = new HTML5($this->options); + + // append HTML to existing + if (isset($markup)) { + + // Parse the HTML and insert it into the DOM + $doc = $html5->loadHTMLFragment($markup); + $this->removeChildren(); + $this->append($doc); + return $this; + } + + $length = $this->size(); + if ($length == 0) { + return NULL; + } + // Only return the first item -- that's what JQ does. + $first = $this->getFirstMatch(); + + // Catch cases where first item is not a legit DOM object. + if (!($first instanceof \DOMNode)) { + return NULL; + } + + // Added by eabrand. + if(!$first->ownerDocument->documentElement) { + return NULL; + } + + if ($first instanceof \DOMDocument || $first->isSameNode($first->ownerDocument->documentElement)) { + return $html5->saveHTML($this->document); //$this->document->saveHTML(); + } + return $html5->saveHTML($first); + } + + /** + * Fetch the HTML contents INSIDE of the first DOMQuery item. + * + * This behaves the way jQuery's @codehtml()@endcode function behaves. + * + * This gets all children of the first match in DOMQuery. + * + * Consider this fragment: + * @code + *
+ * test

foo

test + *
+ * @endcode + * + * We can retrieve just the contents of this code by doing something like + * this: + * @code + * qp($xml, 'div')->innerHTML(); + * @endcode + * + * This would return the following: + * @codetest

foo

test@endcode + * + * @return string + * Returns a string representation of the child nodes of the first + * matched element. + * @see html() + * @see innerXML() + * @see innerXHTML() + * @since 2.0 + */ + public function innerHTML() { + return $this->innerXML(); + } + + /** + * Fetch child (inner) nodes of the first match. + * + * This will return the children of the present match. For an example, + * see {@link innerHTML()}. + * + * @see innerHTML() + * @see innerXML() + * @return string + * Returns a string of XHTML that represents the children of the present + * node. + * @since 2.0 + */ + public function innerXHTML() { + $length = $this->size(); + if ($length == 0) { + return NULL; + } + // Only return the first item -- that's what JQ does. + $first = $this->getFirstMatch(); + + // Catch cases where first item is not a legit DOM object. + if (!($first instanceof \DOMNode)) { + return NULL; + } + elseif (!$first->hasChildNodes()) { + return ''; + } + + $buffer = ''; + foreach ($first->childNodes as $child) { + $buffer .= $this->document->saveXML($child, LIBXML_NOEMPTYTAG); + } + + return $buffer; + } + + /** + * Fetch child (inner) nodes of the first match. + * + * This will return the children of the present match. For an example, + * see {@link innerHTML()}. + * + * @see innerHTML() + * @see innerXHTML() + * @return string + * Returns a string of XHTML that represents the children of the present + * node. + * @since 2.0 + */ + public function innerXML() { + $length = $this->size(); + if ($length == 0) { + return NULL; + } + // Only return the first item -- that's what JQ does. + $first = $this->getFirstMatch(); + + // Catch cases where first item is not a legit DOM object. + if (!($first instanceof \DOMNode)) { + return NULL; + } + elseif (!$first->hasChildNodes()) { + return ''; + } + + $buffer = ''; + foreach ($first->childNodes as $child) { + $buffer .= $this->document->saveXML($child); + } + + return $buffer; + } + + /** + * Get child elements as an HTML5 string. + * + * TODO: This is a very simple alteration of innerXML. Do we need better + * support? + */ + public function innerHTML5() { + $length = $this->size(); + if ($length == 0) { + return NULL; + } + // Only return the first item -- that's what JQ does. + $first = $this->getFirstMatch(); + + // Catch cases where first item is not a legit DOM object. + if (!($first instanceof \DOMNode)) { + return NULL; + } + elseif (!$first->hasChildNodes()) { + return ''; + } + + $html5 = new HTML5($this->options); + $buffer = ''; + foreach ($first->childNodes as $child) { + $buffer .= $html5->saveHTML($child); + } + + return $buffer; + } + + /** + * Retrieve the text of each match and concatenate them with the given separator. + * + * This has the effect of looping through all children, retrieving their text + * content, and then concatenating the text with a separator. + * + * @param string $sep + * The string used to separate text items. The default is a comma followed by a + * space. + * @param boolean $filterEmpties + * If this is true, empty items will be ignored. + * @return string + * The text contents, concatenated together with the given separator between + * every pair of items. + * @see implode() + * @see text() + * @since 2.0 + */ + public function textImplode($sep = ', ', $filterEmpties = TRUE) { + $tmp = array(); + foreach ($this->matches as $m) { + $txt = $m->textContent; + $trimmed = trim($txt); + // If filter empties out, then we only add items that have content. + if ($filterEmpties) { + if (strlen($trimmed) > 0) $tmp[] = $txt; + } + // Else add all content, even if it's empty. + else { + $tmp[] = $txt; + } + } + return implode($sep, $tmp); + } + /** + * Get the text contents from just child elements. + * + * This is a specialized variant of textImplode() that implodes text for just the + * child elements of the current element. + * + * @param string $separator + * The separator that will be inserted between found text content. + * @return string + * The concatenated values of all children. + */ + function childrenText($separator = ' ') { + // Branch makes it non-destructive. + return $this->branch()->xpath('descendant::text()')->textImplode($separator); + } + /** + * Get or set the text contents of a node. + * @param string $text + * If this is not NULL, this value will be set as the text of the node. It + * will replace any existing content. + * @return mixed + * A DOMQuery if $text is set, or the text content if no text + * is passed in as a pram. + * @see html() + * @see xml() + * @see contents() + */ + public function text($text = NULL) { + if (isset($text)) { + $this->removeChildren(); + foreach ($this->matches as $m) $m->appendChild($this->document->createTextNode($text)); + return $this; + } + // Returns all text as one string: + $buf = ''; + foreach ($this->matches as $m) $buf .= $m->textContent; + return $buf; + } + /** + * Get or set the text before each selected item. + * + * If $text is passed in, the text is inserted before each currently selected item. + * + * If no text is given, this will return the concatenated text after each selected element. + * + * @code + * Foo
Bar'; + * + * // This will return 'Foo' + * qp($xml, 'a')->textBefore(); + * + * // This will insert 'Baz' right before . + * qp($xml, 'b')->textBefore('Baz'); + * ?> + * @endcode + * + * @param string $text + * If this is set, it will be inserted before each node in the current set of + * selected items. + * @return mixed + * Returns the DOMQuery object if $text was set, and returns a string (possibly empty) + * if no param is passed. + */ + public function textBefore($text = NULL) { + if (isset($text)) { + $textNode = $this->document->createTextNode($text); + return $this->before($textNode); + } + $buffer = ''; + foreach ($this->matches as $m) { + $p = $m; + while (isset($p->previousSibling) && $p->previousSibling->nodeType == XML_TEXT_NODE) { + $p = $p->previousSibling; + $buffer .= $p->textContent; + } + } + return $buffer; + } + + public function textAfter($text = NULL) { + if (isset($text)) { + $textNode = $this->document->createTextNode($text); + return $this->after($textNode); + } + $buffer = ''; + foreach ($this->matches as $m) { + $n = $m; + while (isset($n->nextSibling) && $n->nextSibling->nodeType == XML_TEXT_NODE) { + $n = $n->nextSibling; + $buffer .= $n->textContent; + } + } + return $buffer; + } + + /** + * Set or get the value of an element's 'value' attribute. + * + * The 'value' attribute is common in HTML form elements. This is a + * convenience function for accessing the values. Since this is not common + * task on the server side, this method may be removed in future releases. (It + * is currently provided for jQuery compatibility.) + * + * If a value is provided in the params, then the value will be set for all + * matches. If no params are given, then the value of the first matched element + * will be returned. This may be NULL. + * + * @deprecated Just use attr(). There's no reason to use this on the server. + * @see attr() + * @param string $value + * @return mixed + * Returns a DOMQuery if a string was passed in, and a string if no string + * was passed in. In the later case, an error will produce NULL. + */ + public function val($value = NULL) { + if (isset($value)) { + $this->attr('value', $value); + return $this; + } + return $this->attr('value'); + } + /** + * Set or get XHTML markup for an element or elements. + * + * This differs from {@link html()} in that it processes (and produces) + * strictly XML 1.0 compliant markup. + * + * Like {@link xml()} and {@link html()}, this functions as both a + * setter and a getter. + * + * This is a convenience function for fetching HTML in XML format. + * It does no processing of the markup (such as schema validation). + * @param string $markup + * A string containing XML data. + * @return mixed + * If markup is passed in, a DOMQuery is returned. If no markup is passed + * in, XML representing the first matched element is returned. + * @see html() + * @see innerXHTML() + */ + public function xhtml($markup = NULL) { + + // XXX: This is a minor reworking of the original xml() method. + // This should be refactored, probably. + // See http://github.com/technosophos/querypath/issues#issue/10 + + $omit_xml_decl = $this->options['omit_xml_declaration']; + if ($markup === TRUE) { + // Basically, we handle the special case where we don't + // want the XML declaration to be displayed. + $omit_xml_decl = TRUE; + } + elseif (isset($markup)) { + return $this->xml($markup); + } + + $length = $this->size(); + if ($length == 0) { + return NULL; + } + + // Only return the first item -- that's what JQ does. + $first = $this->getFirstMatch(); + // Catch cases where first item is not a legit DOM object. + if (!($first instanceof \DOMNode)) { + return NULL; + } + + if ($first instanceof \DOMDocument || $first->isSameNode($first->ownerDocument->documentElement)) { + + // Has the unfortunate side-effect of stripping doctype. + //$text = ($omit_xml_decl ? $this->document->saveXML($first->ownerDocument->documentElement, LIBXML_NOEMPTYTAG) : $this->document->saveXML(NULL, LIBXML_NOEMPTYTAG)); + $text = $this->document->saveXML(NULL, LIBXML_NOEMPTYTAG); + } + else { + $text = $this->document->saveXML($first, LIBXML_NOEMPTYTAG); + } + + // Issue #47: Using the old trick for removing the XML tag also removed the + // doctype. So we remove it with a regex: + if ($omit_xml_decl) { + $text = preg_replace('/<\?xml\s[^>]*\?>/', '', $text); + } + + // This is slightly lenient: It allows for cases where code incorrectly places content + // inside of these supposedly unary elements. + $unary = '/<(area|base|basefont|br|col|frame|hr|img|input|isindex|link|meta|param)(?(?=\s)([^>\/]+))><\/[^>]*>/i'; + $text = preg_replace($unary, '<\\1\\2 />', $text); + + // Experimental: Support for enclosing CDATA sections with comments to be both XML compat + // and HTML 4/5 compat + $cdata = '/()/i'; + $replace = $this->options['escape_xhtml_js_css_sections']; + $text = preg_replace($cdata, $replace, $text); + + return $text; + } + /** + * Set or get the XML markup for an element or elements. + * + * Like {@link html()}, this functions in both a setter and a getter mode. + * + * In setter mode, the string passed in will be parsed and then appended to the + * elements wrapped by this DOMNode object.When in setter mode, this parses + * the XML using the DOMFragment parser. For that reason, an XML declaration + * is not necessary. + * + * In getter mode, the first element wrapped by this DOMNode object will be + * converted to an XML string and returned. + * + * @param string $markup + * A string containing XML data. + * @return mixed + * If markup is passed in, a DOMQuery is returned. If no markup is passed + * in, XML representing the first matched element is returned. + * @see xhtml() + * @see html() + * @see text() + * @see content() + * @see innerXML() + */ + public function xml($markup = NULL) { + $omit_xml_decl = $this->options['omit_xml_declaration']; + if ($markup === TRUE) { + // Basically, we handle the special case where we don't + // want the XML declaration to be displayed. + $omit_xml_decl = TRUE; + } + elseif (isset($markup)) { + if ($this->options['replace_entities']) { + $markup = \QueryPath\Entities::replaceAllEntities($markup); + } + $doc = $this->document->createDocumentFragment(); + $doc->appendXML($markup); + $this->removeChildren(); + $this->append($doc); + return $this; + } + $length = $this->size(); + if ($length == 0) { + return NULL; + } + // Only return the first item -- that's what JQ does. + $first = $this->getFirstMatch(); + + // Catch cases where first item is not a legit DOM object. + if (!($first instanceof \DOMNode)) { + return NULL; + } + + if ($first instanceof \DOMDocument || $first->isSameNode($first->ownerDocument->documentElement)) { + + return ($omit_xml_decl ? $this->document->saveXML($first->ownerDocument->documentElement) : $this->document->saveXML()); + } + return $this->document->saveXML($first); + } + /** + * Send the XML document to the client. + * + * Write the document to a file path, if given, or + * to stdout (usually the client). + * + * This prints the entire document. + * + * @param string $path + * The path to the file into which the XML should be written. if + * this is NULL, data will be written to STDOUT, which is usually + * sent to the remote browser. + * @param int $options + * (As of QueryPath 2.1) Pass libxml options to the saving mechanism. + * @return \QueryPath\DOMQuery + * The DOMQuery object, unmodified. + * @see xml() + * @see innerXML() + * @see writeXHTML() + * @throws Exception + * In the event that a file cannot be written, an Exception will be thrown. + */ + public function writeXML($path = NULL, $options = NULL) { + if ($path == NULL) { + print $this->document->saveXML(NULL, $options); + } + else { + try { + set_error_handler(array('\QueryPath\IOException', 'initializeFromError')); + $this->document->save($path, $options); + } + catch (Exception $e) { + restore_error_handler(); + throw $e; + } + restore_error_handler(); + } + return $this; + } + /** + * Writes HTML to output. + * + * HTML is formatted as HTML 4.01, without strict XML unary tags. This is for + * legacy HTML content. Modern XHTML should be written using {@link toXHTML()}. + * + * Write the document to stdout (usually the client) or to a file. + * + * @param string $path + * The path to the file into which the XML should be written. if + * this is NULL, data will be written to STDOUT, which is usually + * sent to the remote browser. + * @return \QueryPath\DOMQuery + * The DOMQuery object, unmodified. + * @see html() + * @see innerHTML() + * @throws Exception + * In the event that a file cannot be written, an Exception will be thrown. + */ + public function writeHTML($path = NULL) { + if ($path == NULL) { + print $this->document->saveHTML(); + } + else { + try { + set_error_handler(array('\QueryPath\ParseException', 'initializeFromError')); + $this->document->saveHTMLFile($path); + } + catch (Exception $e) { + restore_error_handler(); + throw $e; + } + restore_error_handler(); + } + return $this; + } + + /** + * Write the document to HTML5. + * + * This works the same as the other write* functions, but it encodes the output + * as HTML5 with UTF-8. + * @see html5() + * @see innerHTML5() + * @throws Exception + * In the event that a file cannot be written, an Exception will be thrown. + */ + public function writeHTML5($path = NULL) { + $html5 = new HTML5(); + if ($path == NULL) { + // Print the document to stdout. + print $html5->saveHTML($this->document); + return; + } + + $html5->save($this->document, $path); + } + + /** + * Write an XHTML file to output. + * + * Typically, you should use this instead of {@link writeHTML()}. + * + * Currently, this functions identically to {@link toXML()} except that + * it always uses closing tags (e.g. always @code@endcode, + * never @code, not , not + + + '; + + $xhtml = qp($xml)->xhtml(); + + //throw new Exception($xhtml); + + // Look for a properly formatted BR unary tag: + $regex = '/
/'; + $this->assertRegExp($regex, $xhtml, 'BR should have a closing tag.'); + + // Look for a properly formatted HR tag: + $regex = '/
/'; + $this->assertRegExp($regex, $xhtml, 'BR should have a closing tag.'); + + // Ensure that script tag is not collapsed: + $regex = '/ + + foobar'; + + if (!ob_start()) die ("Could not start OB."); + qp($xml, 'tml')->writeXML(); + $out = ob_get_contents(); + ob_end_clean(); + + // We expect an XML declaration at the top. + $this->assertEquals('writeXML($name); + $this->assertTrue(file_exists($name)); + $this->assertTrue(qp($name) instanceof DOMQuery); + unlink($name); + } + + public function testWriteXHTML() { + $xml = 'foobar'; + + if (!ob_start()) die ("Could not start OB."); + qp($xml, 'tml')->writeXHTML(); + $out = ob_get_contents(); + ob_end_clean(); + + // We expect an XML declaration at the top. + $this->assertEquals(' + + foobar'; + + if (!ob_start()) die ("Could not start OB."); + qp($xml, 'html')->writeXHTML(); + $out = ob_get_contents(); + ob_end_clean(); + + // We expect an XML declaration at the top. + $this->assertEquals('writeXHTML($name); + $this->assertTrue(file_exists($name)); + $this->assertTrue(qp($name) instanceof DOMQuery); + unlink($name); + + // Regression for issue #10 (keep closing tags in XHTML) + $xhtml = 'foo
bar'; + if (!ob_start()) die ("Could not start OB."); + qp($xhtml, 'html')->writeXHTML(); + $out = ob_get_contents(); + ob_end_clean(); + + $pattern = '/<\/script>/'; + $this->assertRegExp($pattern, $out, 'Should be closing script tag.'); + + $pattern = '/<\/br>/'; + $this->assertRegExp($pattern, $out, 'Should be closing br tag.'); + } + + /** + * @expectedException \QueryPath\IOException + */ + public function testFailWriteXML() { + try { + qp()->writeXML('./test/no-writing.xml'); + } + catch (Exception $e) { + //print $e->getMessage(); + throw $e; + } + + } + + /** + * @expectedException \QueryPath\IOException + */ + public function testFailWriteXHTML() { + try { + qp()->writeXHTML('./test/no-writing.xml'); + } + catch (\QueryPath\IOException $e) { + //print $e->getMessage(); + throw $e; + } + + } + + /** + * @expectedException \QueryPath\IOException + */ + public function testFailWriteHTML() { + try { + qp('')->writeXML('./test/no-writing.xml'); + } + catch (\QueryPath\IOException $e) { + // print $e->getMessage(); + throw $e; + } + + } + + public function testWriteHTML() { + $xml = 'foobar'; + + if (!ob_start()) die ("Could not start OB."); + qp($xml, 'tml')->writeHTML(); + $out = ob_get_contents(); + ob_end_clean(); + + // We expect a doctype declaration at the top. + $this->assertEquals('foo + + bar'; + + if (!ob_start()) die ("Could not start OB."); + qp($xml, 'tml')->writeHTML(); + $out = ob_get_contents(); + ob_end_clean(); + + // We expect a doctype declaration at the top. + $this->assertEquals('foo + + bar'; + + if (!ob_start()) die ("Could not start OB."); + qp($xml, 'tml')->writeHTML(); + $out = ob_get_contents(); + ob_end_clean(); + + // We expect a doctype declaration at the top. + $this->assertEquals('writeXML($name); + $this->assertTrue(file_exists($name)); + $this->assertTrue(qp($name) instanceof DOMQuery); + unlink($name); + } + + public function testText() { + $xml = '
Text A
Text B
'; + $this->assertEquals('Text AText B', qp($xml)->text()); + $this->assertEquals('Foo', qp($xml, 'div')->eq(0)->text('Foo')->text()); + $this->assertEquals('BarBar', qp($xml, 'div')->text('Bar')->text()); + } + + public function testTextAfter() { + $xml = '
After
After2
After3'; + $this->assertEquals('AfterAfter2', qp($xml, 'br')->textAfter()); + $this->assertEquals('Blarg', qp($xml, 'foo')->textAfter('Blarg')->top('foo')->textAfter()); + } + + public function testTextBefore() { + $xml = 'Before
Before2
Before3
'; + $this->assertEquals('BeforeBefore2', qp($xml, 'br')->textBefore()); + $this->assertEquals('Blarg', qp($xml, 'foo')->textBefore('Blarg')->top('foo')->textBefore()); + + } + + public function testTextImplode() { + $xml = '
Text A
Text B
'; + $this->assertEquals('Text A, Text B', qp($xml, 'div')->textImplode()); + $this->assertEquals('Text A--Text B', qp($xml, 'div')->textImplode('--')); + + $xml = '
Text A
Text B
'; + $this->assertEquals('Text A , Text B', qp($xml, 'div')->textImplode()); + + $xml = '
Text A
+
+
Text B
'; + $this->assertEquals('Text A , Text B', qp($xml, 'div')->textImplode(', ', TRUE)); + + // Test with empties + $xml = '
Text A
Text B
'; + $this->assertEquals('Text A- -Text B', qp($xml, 'div')->textImplode('-', FALSE)); + } + + public function testChildrenText() { + $xml = ' + NOT ME! +
Text A
+
+
Text B
'; + $this->assertEquals('Text A , Text B', qp($xml, 'div')->childrenText(', ', TRUE), 'Just inner text.'); + } + + public function testNext() { + $file = DATA_FILE; + $this->assertEquals('inner', qp($file, 'unary')->next()->tag()); + $this->assertEquals('foot', qp($file, 'inner')->next()->eq(1)->tag()); + + $this->assertEquals('foot', qp($file, 'unary')->next('foot')->tag()); + + // Regression test for issue eabrand identified: + + $qp = qp(\QueryPath::HTML_STUB, 'body')->append('

Hello

Goodbye

') + ->children('p') + ->after('

new paragraph

'); + + $testarray = array('new paragraph', 'Goodbye', 'new paragraph'); + + //throw new Exception($qp->top()->xml()); + + $qp = $qp->top('p:first-of-type'); + $this->assertEquals('Hello', $qp->text(), "Test First P " . $qp->top()->html()); + $i = 0; + while($qp->next('p')->html() != null) { + $qp = $qp->next('p'); + $this->assertEquals(1, count($qp)); + $this->assertEquals($testarray[$i], $qp->text(), $i . " didn't match " . $qp->top()->xml() ); + $i++; + } + $this->assertEquals(3, $i); +// $this->assertEquals('new paragraph', $qp->next()->text(), "Test Newly Added P"); +// $this->assertEquals('Goodbye', $qp->next()->text(), "Test third P"); +// $this->assertEquals('new paragraph', $qp->next()->text(), "Test Other Newly Added P"); + } + public function testPrev() { + $file = DATA_FILE; + $this->assertEquals('head', qp($file, 'unary')->prev()->tag()); + $this->assertEquals('inner', qp($file, 'inner')->prev()->eq(1)->tag()); + $this->assertEquals('head', qp($file, 'foot')->prev('head')->tag()); + } + public function testNextAll() { + $file = DATA_FILE; + $this->assertEquals(3, qp($file, '#one')->nextAll()->size()); + $this->assertEquals(2, qp($file, 'unary')->nextAll('inner')->size()); + } + public function testPrevAll() { + $file = DATA_FILE; + $this->assertEquals(3, qp($file, '#four')->prevAll()->size()); + $this->assertEquals(2, qp($file, 'foot')->prevAll('inner')->size()); + } + public function testParent() { + $file = DATA_FILE; + $this->assertEquals('root', qp($file, 'unary')->parent()->tag()); + $this->assertEquals('root', qp($file, 'li')->parent('root')->tag()); + $this->assertEquals(2, qp($file, 'li')->parent()->size()); + } + public function testClosest() { + $file = DATA_FILE; + $this->assertEquals('root', qp($file, 'li')->parent('root')->tag()); + + $xml = ' + + + + + + '; + $this->assertEquals(2, qp($xml, 'b')->closest('.foo')->size()); + } + + public function testParents() { + $file = DATA_FILE; + + // Three: two inners and a root. + $this->assertEquals(3, qp($file, 'li')->parents()->size()); + $this->assertEquals('root', qp($file, 'li')->parents('root')->tag()); + } + + public function testCloneAll() { + $file = DATA_FILE; + + // Shallow test + $qp = qp($file, 'unary'); + $one = $qp->get(0); + $two = $qp->cloneAll()->get(0); + $this->assertTrue($one !== $two); + $this->assertEquals('unary', $two->tagName); + + // Deep test: make sure children are also cloned. + $qp = qp($file, 'inner'); + $one = $qp->find('li')->get(0); + $two = $qp->top('inner')->cloneAll(TRUE)->findInPlace('li')->get(0); + $this->assertEquals('li', $two->tagName); + $this->assertTrue($one !== $two); + } + + public function testBranch() { + $qp = qp(\QueryPath::HTML_STUB); + $branch = $qp->branch(); + $branch->top('title')->text('Title'); + $qp->top('title')->text('FOOOOO')->top(); + $qp->find('body')->text('This is the body'); + + $this->assertEquals($qp->top('title')->text(), $branch->top('title')->text(), $branch->top()->html()); + + $qp = qp(\QueryPath::HTML_STUB); + $branch = $qp->branch('title'); + $branch->find('title')->text('Title'); + $qp->find('body')->text('This is the body'); + $this->assertEquals($qp->top()->find('title')->text(), $branch->text()); + } + + public function testXpath() { + $file = DATA_FILE; + + $this->assertEquals('head', qp($file)->xpath("//*[@id='head']")->tag()); + } + + public function test__clone() { + $file = DATA_FILE; + + $qp = qp($file, 'inner:first-of-type'); + $qp2 = clone $qp; + $this->assertFalse($qp === $qp2); + $qp2->findInPlace('li')->attr('foo', 'bar'); + $this->assertEquals('', $qp->find('li')->attr('foo')); + $this->assertEquals('bar', $qp2->attr('foo'), $qp2->top()->xml()); + } + + public function testStub() { + $this->assertEquals(1, qp(\QueryPath::HTML_STUB)->find('title')->size()); + } + + public function testIterator() { + + $qp = qp(\QueryPath::HTML_STUB, 'body')->append('
  • '); + + $this->assertEquals(4, $qp->find('li')->size()); + $i = 0; + foreach ($qp->find('li') as $li) { + ++$i; + $li->text('foo'); + } + $this->assertEquals(4, $i); + $this->assertEquals('foofoofoofoo', $qp->top()->find('li')->text()); + } + + public function testModeratelySizedDocument() { + + $this->assertEquals(1, qp(MEDIUM_FILE)->size()); + + $contents = file_get_contents(MEDIUM_FILE); + $this->assertEquals(1, qp($contents)->size()); + } + + /** + * @deprecated + */ + public function testSize() { + $file = DATA_FILE; + $qp = qp($file, 'li'); + $this->assertEquals(5, $qp->size()); + } + + public function testCount() { + $file = DATA_FILE; + $qp = qp($file, 'li'); + $this->assertEquals(5, $qp->count()); + + // Test that this is exposed to PHP's Countable logic. + $this->assertEquals(5, count(qp($file, 'li'))); + + } + + public function testLength() { + + // Test that the length attribute works exactly the same as size. + $file = DATA_FILE; + $qp = qp($file, 'li'); + $this->assertEquals(5, $qp->length); + + + } + + public function testDocument() { + $file = DATA_FILE; + $doc1 = new \DOMDocument('1.0'); + $doc1->load($file); + $qp = qp($doc1); + + $this->assertEquals($doc1, $qp->document()); + + // Ensure that adding to the DOMDocument is accessible to QP: + $ele = $doc1->createElement('testDocument'); + $doc1->documentElement->appendChild($ele); + + $this->assertEquals(1, $qp->find('testDocument')->size()); + } + + /* + public function test__get() { + // Test that other properties are not interferred with by __get(). + $file = DATA_FILE; + $options = array('QueryPath_class' => 'QueryPathExtended'); + $foo = qp($file,'li', $options)->foo; + + $this->assertEquals('bar', $foo); + } + */ + + /** + * @ expectedException \QueryPath\Exception + */ + /* + public function testFailed__get() { + // This should generate an error because 'last' is protected. + qp(DATA_FILE)->last; + } + */ + + public function testDetach() { + $file = DATA_FILE; + $qp = qp($file, 'li'); + $start = $qp->size(); + $finish = $qp->detach()->size(); + $this->assertEquals($start, $finish); + $this->assertEquals(0, $qp->find(':root li')->size()); + } + + public function testAttach() { + $file = DATA_FILE; + $qp = qp($file, 'li'); + $start = $qp->size(); + $finish = $qp->detach()->size(); + $dest = qp('', 'dest'); + $qp = $qp->attach($dest); + $this->assertEquals(5, $dest->find(':root li')->size()); + } + + public function testEmptyElement() { + $file = DATA_FILE; + $this->assertEquals(0, qp($file, '#inner-two')->emptyElement()->find('li')->size()); + $this->assertEquals('', qp($file, '#inner-two')->emptyElement()->html()); + + // Make sure text children get wiped out, too. + $this->assertEquals('', qp($file, 'foot')->emptyElement()->text()); + } + + public function testHas() { + $file = DATA_FILE; + + // Test with DOMNode object + $qp = qp($file, 'foot'); + $selector = $qp->get(0); + $qp = $qp->top('root')->has($selector); + + // This should have one element named 'root'. + $this->assertEquals(1, $qp->size(), 'One element is a parent of foot'); + $this->assertEquals('root', $qp->tag(), 'Root has foot.'); + + // Test with CSS selector + $qp = qp($file, 'root')->has('foot'); + + // This should have one element named 'root'. + $this->assertEquals(1, $qp->size(), 'One element is a parent of foot'); + $this->assertEquals('root', $qp->tag(), 'Root has foot.'); + + // Test multiple matches. + $qp = qp($file, '#docRoot, #inner-two')->has('#five'); + $this->assertEquals(2, $qp->size(), 'Two elements are parents of #five'); + $this->assertEquals('inner', $qp->get(0)->tagName, 'Inner has li.'); + + /* + $this->assertEquals(qp($file, '#one')->children()->get(), qp($file, '#inner-one')->has($selector)->get(), "Both should be empty/false"); + $qp = qp($file, 'root')->children("inner"); + $selector = qp($file, '#two'); + $this->assertNotEquals(qp($file, '#head'), qp($file, '#inner-one')->has($selector)); + $this->assertEquals(qp($file, 'root'), qp($file, 'root')->has($selector), "Should both have 1 element - root"); + */ + } + + public function testNextUntil() { + $file = DATA_FILE; + $this->assertEquals(3, qp($file, '#one')->nextUntil()->size()); + $this->assertEquals(2, qp($file, 'li')->nextUntil('#three')->size()); + } + + public function testPrevUntil() { + $file = DATA_FILE; + $this->assertEquals(3, qp($file, '#four')->prevUntil()->size()); + $this->assertEquals(2, qp($file, 'foot')->prevUntil('unary')->size()); + } + + public function testEven() { + $file = DATA_FILE; + $this->assertEquals(1, qp($file, 'inner')->even()->size()); + $this->assertEquals(2, qp($file, 'li')->even()->size()); + } + + public function testOdd() { + $file = DATA_FILE; + $this->assertEquals(1, qp($file, 'inner')->odd()->size()); + $this->assertEquals(3, qp($file, 'li')->odd()->size()); + } + + public function testFirst() { + $file = DATA_FILE; + $this->assertEquals(1, qp($file, 'inner')->first()->size()); + $this->assertEquals(1, qp($file, 'li')->first()->size()); + $this->assertEquals("Hello", qp($file, 'li')->first()->text()); + } + + public function testFirstChild() { + $file = DATA_FILE; + $this->assertEquals(1, qp($file, '#inner-one')->firstChild()->size()); + $this->assertEquals("Hello", qp($file, '#inner-one')->firstChild()->text()); + } + + public function testLast() { + $file = DATA_FILE; + $this->assertEquals(1, qp($file, 'inner')->last()->size()); + $this->assertEquals(1, qp($file, 'li')->last()->size()); + $this->assertEquals('', qp($file, 'li')->last()->text()); + } + + public function testLastChild() { + $file = DATA_FILE; + $this->assertEquals(1, qp($file, '#inner-one')->lastChild()->size()); + $this->assertEquals("Last", qp($file, '#inner-one')->lastChild()->text()); + } + + public function testParentsUntil() { + $file = DATA_FILE; + + // Three: two inners and a root. + $this->assertEquals(3, qp($file, 'li')->parentsUntil()->size()); + $this->assertEquals(2, qp($file, 'li')->parentsUntil('root')->size()); + } + + public function testSort() { + $xml = '1521'; + + // Canary. + $qp = qp($xml, 'i'); + $expect = array(1, 5, 2, 1); + foreach($qp as $item) { + $this->assertEquals(array_shift($expect), $item->text()); + } + + // Test simple ordering. + $comp = function (\DOMNode $a, \DOMNode $b) { + if ($a->textContent == $b->textContent) { + return 0; + } + return $a->textContent > $b->textContent ? 1 : -1; + }; + $qp = qp($xml, 'i')->sort($comp); + $expect = array(1, 1, 2, 5); + foreach($qp as $item) { + $this->assertEquals(array_shift($expect), $item->text()); + } + + $comp = function (\DOMNode $a, \DOMNode $b) { + $qpa = qp($a); + $qpb = qp($b); + + if ($qpa->text() == $qpb->text()) { + return 0; + } + return $qpa->text()> $qpb->text()? 1 : -1; + }; + $qp = qp($xml, 'i')->sort($comp); + $expect = array(1, 1, 2, 5); + foreach($qp as $item) { + $this->assertEquals(array_shift($expect), $item->text()); + } + + // Test DOM re-ordering + $comp = function (\DOMNode $a, \DOMNode $b) { + if ($a->textContent == $b->textContent) { + return 0; + } + return $a->textContent > $b->textContent ? 1 : -1; + }; + $qp = qp($xml, 'i')->sort($comp, TRUE); + $expect = array(1, 1, 2, 5); + foreach($qp as $item) { + $this->assertEquals(array_shift($expect), $item->text()); + } + $res = $qp->top()->xml(); + $expect_xml = '1125'; + $this->assertXmlStringEqualsXmlString($expect_xml, $res); + } + + /** + * Regression test for issue #14. + */ + public function testRegressionFindOptimizations() { + $xml = ' + + + Test + + + '; + + // From inside, should not be able to find outside. + $this->assertEquals(0, qp($xml, '#inside')->find('#outside')->size()); + + $xml = ' + + + Test + + + '; + // From inside, should not be able to find outside. + $this->assertEquals(0, qp($xml, '.inside')->find('.outside')->size()); + } + + public function testDataURL() { + + $text = 'Hi!'; // Base-64 encoded value would be SGkh + $xml = ''; + + $qp = qp($xml, 'item')->dataURL('secret', $text, 'text/plain'); + + $this->assertEquals(1, $qp->top('item[secret]')->size(), 'One attr should be added.'); + + $this->assertEquals('data:text/plain;base64,SGkh', $qp->attr('secret'), 'Attr value should be data URL.'); + + $result = $qp->dataURL('secret'); + $this->assertEquals(2, count($result), 'Should return two-array.'); + $this->assertEquals($text, $result['data'] , 'Should return original data, decoded.'); + $this->assertEquals('text/plain', $result['mime'], 'Should return the original MIME'); + } + + public function testEncodeDataURL() { + $data = \QueryPath::encodeDataURL('Hi!', 'text/plain'); + $this->assertEquals('data:text/plain;base64,SGkh', $data); + } +} + +/** + * A simple mock for testing qp()'s abstract factory. + * + * @ingroup querypath_tests + */ +class QueryPathExtended extends DOMQuery { + public $foo = 'bar'; + public function foonator() { + return TRUE; + } +} diff --git a/lib/querypath/test/Tests/QueryPath/EntitiesTest.php b/lib/querypath/test/Tests/QueryPath/EntitiesTest.php new file mode 100644 index 0000000..62a3426 --- /dev/null +++ b/lib/querypath/test/Tests/QueryPath/EntitiesTest.php @@ -0,0 +1,54 @@ + + * @license The GNU Lesser GPL (LGPL) or an MIT-like license. + */ +namespace QueryPath\Tests; +require_once __DIR__ . '/TestCase.php'; + +/** + * @ingroup querypath_tests + */ +class EntitiesTest extends TestCase { + public function testReplaceEntity() { + $entity = 'amp'; + $this->assertEquals('38', \QueryPath\Entities::replaceEntity($entity)); + + $entity = 'lceil'; + $this->assertEquals('8968', \QueryPath\Entities::replaceEntity($entity)); + } + + public function testReplaceAllEntities() { + $test = '&©&& nothing.'; + $expect = '&©&& nothing.'; + $this->assertEquals($expect, \QueryPath\Entities::replaceAllEntities($test)); + + $test = '&&& '; + $expect = '&&& '; + $this->assertEquals($expect, \QueryPath\Entities::replaceAllEntities($test)); + + $test = "é\n"; + $expect = "é\n"; + $this->assertEquals($expect, \QueryPath\Entities::replaceAllEntities($test)); + } + + public function testReplaceHexEntities() { + $test = '©'; + $expect = '©'; + $this->assertEquals($expect, \QueryPath\Entities::replaceAllEntities($test)); + } + + public function testQPEntityReplacement() { + $test = '&©&& nothing.'; + /*$expect = '&©&& nothing.';*/ + // We get this because the DOM serializer re-converts entities. + $expect = ' +&©&& nothing.'; + + $qp = qp($test, NULL, array('replace_entities' => TRUE)); + // Interestingly, the XML serializer converts decimal to hex and ampersands + // to &. + $this->assertEquals($expect, trim($qp->xml())); + } +} diff --git a/lib/querypath/test/Tests/QueryPath/ExtensionTest.php b/lib/querypath/test/Tests/QueryPath/ExtensionTest.php new file mode 100644 index 0000000..5f98612 --- /dev/null +++ b/lib/querypath/test/Tests/QueryPath/ExtensionTest.php @@ -0,0 +1,153 @@ + + * @license The GNU Lesser GPL (LGPL) or an MIT-like license. + */ +namespace QueryPath\Tests; +//require_once 'PHPUnit/Autoload.php'; +require_once __DIR__ . '/TestCase.php'; +require_once __DIR__ . '/../../../src/QueryPath/Extension.php'; +//require_once __DIR__ . '/../../../src/QueryPath.php'; +//require_once 'QueryPathTest.php'; + +use \QueryPath\Extension; +use \QueryPath\ExtensionRegistry; + +/** + * + */ +//define('self::DATA_FILE', 'test/data.xml'); + +/** + * Run all of the usual tests, plus some extras, with some extensions loaded. + * @ingroup querypath_tests + * @group extension + */ +class QueryPathExtensionTest extends TestCase { + + public static function setUpBeforeClass() { + ExtensionRegistry::extend('\QueryPath\Tests\StubExtensionOne'); + ExtensionRegistry::extend('\QueryPath\Tests\StubExtensionTwo'); + } + + public function testExtensions() { + $this->assertNotNull(qp()); + } + + public function testHasExtension() { + $this->assertTrue(ExtensionRegistry::hasExtension('\QueryPath\Tests\StubExtensionOne')); + } + + public function testStubToe() { + $this->assertEquals(1, qp(self::DATA_FILE, 'unary')->stubToe()->top(':root > toe')->size()); + } + + public function testStuble() { + $this->assertEquals('arg1arg2', qp(self::DATA_FILE)->stuble('arg1', 'arg2')); + } + + /** + * @expectedException \QueryPath\Exception + */ + public function testNoRegistry() { + ExtensionRegistry::$useRegistry = FALSE; + try { + qp(self::DATA_FILE)->stuble('arg1', 'arg2'); + } + catch (\QueryPath\Exception $e) { + ExtensionRegistry::$useRegistry = TRUE; + throw $e; + } + + } + + public function testExtend() { + $this->assertFalse(ExtensionRegistry::hasExtension('\QueryPath\Tests\StubExtensionThree')); + ExtensionRegistry::extend('\QueryPath\Tests\StubExtensionThree'); + $this->assertTrue(ExtensionRegistry::hasExtension('\QueryPath\Tests\StubExtensionThree')); + } + + public function tearDown() { + ExtensionRegistry::$useRegistry = TRUE; + } + + /** + * @expectedException \QueryPath\Exception + */ + public function testAutoloadExtensions() { + // FIXME: This isn't really much of a test. + ExtensionRegistry::autoloadExtensions(FALSE); + try { + qp()->stubToe(); + } + catch (Exception $e) { + ExtensionRegistry::autoloadExtensions(TRUE); + throw $e; + } + } + + /** + * @expectedException \QueryPath\Exception + */ + public function testCallFailure() { + qp()->foo(); + } + + // This does not (and will not) throw an exception. + // /** + // * @expectedException QueryPathException + // */ + // public function testExtendNoSuchClass() { + // ExtensionRegistry::extend('StubExtensionFour'); + // } + +} +// Create a stub extension: +/** + * Create a stub extension + * + * @ingroup querypath_tests + */ +class StubExtensionOne implements Extension { + private $qp = NULL; + public function __construct(\QueryPath\Query $qp) { + $this->qp = $qp; + } + + public function stubToe() { + $this->qp->top()->append('')->end(); + return $this->qp; + } +} +/** + * Create a stub extension + * + * @ingroup querypath_tests + */ +class StubExtensionTwo implements Extension { + private $qp = NULL; + public function __construct(\QueryPath\Query $qp) { + $this->qp = $qp; + } + public function stuble($arg1, $arg2) { + return $arg1 . $arg2; + } +} +/** + * Create a stub extension + * + * @ingroup querypath_tests + */ +class StubExtensionThree implements Extension { + private $qp = NULL; + public function __construct(\QueryPath\Query $qp) { + $this->qp = $qp; + } + public function stuble($arg1, $arg2) { + return $arg1 . $arg2; + } +} + +//ExtensionRegistry::extend('StubExtensionOne'); +//ExtensionRegistry::extend('StubExtensionTwo'); diff --git a/lib/querypath/test/Tests/QueryPath/Extensions/QPXMLTest.php b/lib/querypath/test/Tests/QueryPath/Extensions/QPXMLTest.php new file mode 100644 index 0000000..266003e --- /dev/null +++ b/lib/querypath/test/Tests/QueryPath/Extensions/QPXMLTest.php @@ -0,0 +1,41 @@ + + * @license The GNU Lesser GPL (LGPL) or an MIT-like license. + */ +namespace QueryPath\Tests; + +//require_once 'PHPUnit/Autoload.php'; +require_once __DIR__ . '/../TestCase.php'; +require_once 'src/QueryPath/Extension/QPXML.php'; +/** + * @ingroup querypath_tests + * @group extension + */ +class QPXMLTests extends TestCase { + + protected $file = './test/advanced.xml'; + public static function setUpBeforeClass() { + \QueryPath::enable('\QueryPath\Extension\QPXML'); + } + + public function testCDATA() { + $this->assertEquals('This is a CDATA section.', qp($this->file, 'first')->cdata()); + + $msg = 'Another CDATA Section'; + $this->assertEquals($msg, qp($this->file, 'second')->cdata($msg)->top()->find('second')->cdata()); + } + + public function testComment(){ + $this->assertEquals('This is a comment.', trim(qp($this->file, 'root')->comment())); + $msg = "Message"; + $this->assertEquals($msg, qp($this->file, 'second')->comment($msg)->top()->find('second')->comment()); + } + + public function testProcessingInstruction() { + $this->assertEquals('This is a processing instruction.', trim(qp($this->file, 'third')->pi())); + $msg = "Message"; + $this->assertEquals($msg, qp($this->file, 'second')->pi('qp', $msg)->top()->find('second')->pi()); + } +} diff --git a/lib/querypath/test/Tests/QueryPath/Extensions/QPXSLTest.php b/lib/querypath/test/Tests/QueryPath/Extensions/QPXSLTest.php new file mode 100644 index 0000000..f0c5ed1 --- /dev/null +++ b/lib/querypath/test/Tests/QueryPath/Extensions/QPXSLTest.php @@ -0,0 +1,60 @@ + + * @license The GNU Lesser GPL (LGPL) or an MIT-like license. + */ + +namespace QueryPath\Tests; + +//require_once 'PHPUnit/Autoload.php'; +require_once 'src/QueryPath/Extension/QPXSL.php'; +require_once __DIR__ . '/../TestCase.php'; +/** + * @ingroup querypath_tests + * @extension + */ +class QPXSLTests extends TestCase { + + protected $file = './test/advanced.xml'; + + public static function setUpBeforeClass() { + \QueryPath::enable('\QueryPath\Extension\QPXSL'); + } + public function testXSLT() { + // XML and XSLT taken from http://us.php.net/manual/en/xsl.examples-collection.php + // and then modified to be *actually welformed* XML. + $orig = ' + + Fight for your mind + Ben Harper + 1995 + + + Electric Ladyland + Jimi Hendrix + 1997 + + '; + + $template = ' + + + +
    + Hey! Welcome to \'s sweet CD collection! + +
    +
    + +

    +

    by -

    +
    +
    +
    + '; + + $qp = qp($orig)->xslt($template); + $this->assertEquals(2, $qp->top('h1')->size(), 'Make sure that data was formatted'); + } +} diff --git a/lib/querypath/test/Tests/QueryPath/OptionsTest.php b/lib/querypath/test/Tests/QueryPath/OptionsTest.php new file mode 100644 index 0000000..d2efbc9 --- /dev/null +++ b/lib/querypath/test/Tests/QueryPath/OptionsTest.php @@ -0,0 +1,60 @@ + + * @license The GNU Lesser GPL (LGPL) or an MIT-like license. + */ +namespace QueryPath\Tests; +require_once __DIR__ . '/TestCase.php'; +use \QueryPath\Options; + +/** + * @ingroup querypath_tests + */ +class OptionsTest extends TestCase { + + public function testOptions() { + $expect = array('test1' => 'val1', 'test2' => 'val2'); + $options = array('test1' => 'val1', 'test2' => 'val2'); + + Options::set($options); + + $results = Options::get(); + $this->assertEquals($expect, $results); + + $this->assertEquals('val1', $results['test1']); + } + + public function testQPOverrideOrder() { + $expect = array('test1' => 'val3', 'test2' => 'val2'); + $options = array('test1' => 'val1', 'test2' => 'val2'); + + Options::set($options); + $qpOpts = qp(NULL, NULL, array('test1'=>'val3', 'replace_entities' => TRUE))->getOptions(); + + $this->assertEquals($expect['test1'], $qpOpts['test1']); + $this->assertEquals(TRUE, $qpOpts['replace_entities']); + $this->assertNull($qpOpts['parser_flags']); + $this->assertEquals($expect['test2'], $qpOpts['test2']); + } + + public function testQPHas() { + $options = array('test1' => 'val1', 'test2' => 'val2'); + + Options::set($options); + $this->assertTrue(Options::has('test1')); + $this->assertFalse(Options::has('test3')); + } + public function testQPMerge() { + $options = array('test1' => 'val1', 'test2' => 'val2'); + $options2 = array('test1' => 'val3', 'test4' => 'val4'); + + Options::set($options); + Options::merge($options2); + + $results = Options::get(); + $this->assertTrue(Options::has('test4')); + $this->assertEquals('val3', $results['test1']); + } + +} diff --git a/lib/querypath/test/Tests/QueryPath/QueryPathTest.php b/lib/querypath/test/Tests/QueryPath/QueryPathTest.php new file mode 100644 index 0000000..71fd225 --- /dev/null +++ b/lib/querypath/test/Tests/QueryPath/QueryPathTest.php @@ -0,0 +1,56 @@ +assertInstanceOf('\QueryPath\DOMQuery', $qp); + + } + + public function testWithHTML() { + $qp = \QueryPath::with(\QueryPath::HTML_STUB); + + $this->assertInstanceOf('\QueryPath\DOMQuery', $qp); + } + public function testWithHTML5() { + $qp = \QueryPath::withHTML5(\QueryPath::HTML5_STUB); + + $this->assertInstanceOf('\QueryPath\DOMQuery', $qp); + } + + public function testWithXML() { + $qp = \QueryPath::with(\QueryPath::XHTML_STUB); + + $this->assertInstanceOf('\QueryPath\DOMQuery', $qp); + } + + public function testEnable() { + \QueryPath::enable('\QueryPath\Tests\DummyExtension'); + + $qp = \QueryPath::with(\QueryPath::XHTML_STUB); + + $this->assertTrue($qp->grrrrrrr()); + + } + +} + +class DummyExtension implements \QueryPath\Extension { + + public function __construct(\QueryPath\Query $qp) { + $this->qp = $qp; + } + + public function grrrrrrr() { + return TRUE; + } + +} diff --git a/lib/querypath/test/Tests/QueryPath/TestCase.php b/lib/querypath/test/Tests/QueryPath/TestCase.php new file mode 100644 index 0000000..7e212b0 --- /dev/null +++ b/lib/querypath/test/Tests/QueryPath/TestCase.php @@ -0,0 +1,22 @@ + + * @license The GNU Lesser GPL (LGPL) or an MIT-like license. + */ + +namespace QueryPath\Tests; + +/** @addtogroup querypath_tests Tests + * Unit tests and regression tests for QueryPath. + */ + +/** */ +//require_once 'PHPUnit/Autoload.php'; +require_once __DIR__ . '/TestCase.php'; + +/** + * Test the XMLish functions of QueryPath. + * + * This uses a testing harness, XMLishMock, to test + * a protected method of QueryPath. + * + * @ingroup querypath_test + */ +class XMLishTest extends TestCase { + public function testXMLishMock() { + $tests = array( + 'this/is/a/path' => FALSE, + "this is just some plain\ntext with a line break." => FALSE, + '2 > 1' => FALSE, + '1 < 2' => FALSE, + //'1 < 2 > 1' => FALSE, + '' => TRUE, + '' => TRUE, + '' => TRUE, // It's not valid, but HTML parser will try it. + ); + foreach ($tests as $test => $correct) { + $mock = new XMLishMock(); + $this->assertEquals($correct, $mock->exposedIsXMLish($test), "Testing $test"); + } + } + + public function testXMLishWithBrokenHTML() { + $html = '
    Abe H. Rosenbloom Field

    Located in a natural bowl north of 10th Avenue, Rosenbloom Field was made possible by a gift from Virginia Whitney Rosenbloom \'36 and Abe H. Rosenbloom \'34. The Pioneers observed the occasion of the field\'s dedication on Oct. 4, 1975, by defeating Carleton 36-26. Rosenbloom Field has a seating capacity of 1,500.

    A former member of the Grinnell Advisory Board and other college committees, Abe Rosenbloom played football at Grinnell from 1931 to 1933. He played guard and was one of the Missouri Valley Conference\'s smallest gridders (5\'6" and 170 pounds). He averaged more than 45 minutes a game playing time during a 24-game varsity career and was named to the Des Moines Register\'s all-Missouri Valley Conference squad in 1932 and 1933.

    On the south side of the field, a memorial recalls the 100th anniversary of the first intercollegiate football game played west of the Mississippi. The game took place on the Grinnell campus on Nov. 16, 1889. On the north side, a marker commemorates the first 50 years of football in the west, and recalls the same game, played in 1889, Grinnell College vs. the University of Iowa. Grinnell won, 24-0.

    '; + $mock = new XMLishMock(); + $this->assertEquals(TRUE, $mock->exposedIsXMLish($html), "Testing broken HTML"); + } + +} + +/** + * A testing class for XMLish tests. + * + * @ingroup querypath_tests + */ +class XMLishMock extends \QueryPath\DOMQuery { + public function exposedIsXMLish($str) { + return $this->isXMLish($str); + } +} diff --git a/lib/querypath/test/advanced.xml b/lib/querypath/test/advanced.xml new file mode 100644 index 0000000..e6c2eb2 --- /dev/null +++ b/lib/querypath/test/advanced.xml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/lib/querypath/test/amplify.xml b/lib/querypath/test/amplify.xml new file mode 100644 index 0000000..70638b0 --- /dev/null +++ b/lib/querypath/test/amplify.xml @@ -0,0 +1,4370 @@ + + + + + + + + + Computers + 18.000000 + + + + + Computer programming + 11.000000 + + + + + debugger + 6.000000 + + + + Neutral + 0.000000 + + + Positive + 0.397979 + + + Positive + 0.397979 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + script + 5.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + + + + + + + RSS + 53.000000 + + + + Negative + -0.066667 + + + Positive + 0.666667 + + + Positive + 1.000000 + + + + To Some Extent + 2.000000 + + + To Some Extent + 2.000000 + + + + + feed + 31.000000 + + + + Negative + -0.250000 + + + Positive + 0.511993 + + + Positive + 1.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + Apache + 31.000000 + + + + Negative + -0.300000 + + + Positive + 0.378744 + + + Positive + 0.845782 + + + + A Lot + 3.000000 + + + To Some Extent + 2.000000 + + + + + IE + 29.000000 + + + + Negative + -0.400000 + + + Positive + 0.200000 + + + Positive + 0.800000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + Xmlhttprequest + 21.000000 + + + + Negative + -0.900000 + + + Negative + -0.050000 + + + Positive + 0.800000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + mime type + 20.000000 + + + + Negative + -1.000000 + + + Negative + -0.220367 + + + Positive + 0.738899 + + + + To Some Extent + 2.000000 + + + To Some Extent + 2.000000 + + + + + Drupal + 19.000000 + + + + Negative + -0.066667 + + + Positive + 0.898987 + + + Positive + 1.000000 + + + + To Some Extent + 2.000000 + + + To Some Extent + 2.000000 + + + + + XML + 15.000000 + + + + Negative + -0.400000 + + + Positive + 0.200000 + + + Positive + 0.800000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + object + 15.000000 + + + + Neutral + 0.000000 + + + Positive + 0.800000 + + + Positive + 0.800000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + request + 14.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Positive + 0.150000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + Javascript + 10.000000 + + + + Negative + -0.066667 + + + Positive + 0.500000 + + + Positive + 0.500000 + + + + To Some Extent + 2.000000 + + + To Some Extent + 2.000000 + + + + + mod_rewrite + 9.000000 + + + + Negative + -0.300000 + + + Positive + 0.233333 + + + Positive + 0.500000 + + + + To Some Extent + 2.000000 + + + To Some Extent + 2.000000 + + + + + AJAX + 9.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + error + 8.000000 + + + + Negative + -0.795959 + + + Neutral + 0.000000 + + + Positive + 0.795959 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + application + 8.000000 + + + + Neutral + 0.000000 + + + Positive + 0.900000 + + + Positive + 1.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + rewrite rule + 8.000000 + + + + Neutral + 0.000000 + + + Positive + 0.859900 + + + Positive + 0.859900 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + IE Xmlhttprequest + 7.000000 + + + + Neutral + 0.000000 + + + Positive + 0.500000 + + + Positive + 0.500000 + + + + Not At All + 1.000000 + + + To Some Extent + 2.000000 + + + + + Firefox + 7.000000 + + + + Negative + -1.000000 + + + Negative + -1.000000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + server + 7.000000 + + + + Negative + -0.250000 + + + Positive + 0.125000 + + + Positive + 0.500000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + Technosophos + 7.000000 + + + + Neutral + 0.000000 + + + Positive + 0.500000 + + + Positive + 0.500000 + + + + Not At All + 1.000000 + + + To Some Extent + 2.000000 + + + + + header + 7.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + data inline + 6.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + debugger + 6.000000 + + + + Neutral + 0.000000 + + + Positive + 0.397979 + + + Positive + 0.397979 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + line + 6.000000 + + + + Negative + -1.000000 + + + Neutral + 0.000000 + + + Positive + 1.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + configuration + 6.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + jQuery + 6.000000 + + + + Negative + -0.066667 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + To Some Extent + 2.000000 + + + + + header x-requested-with + 6.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + 2.x + 6.000000 + + + + Negative + -0.300000 + + + Negative + -0.300000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + widget + 6.000000 + + + + Negative + -0.300000 + + + Negative + -0.300000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + A Lot + 3.000000 + + + + + Content-type + 6.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + HTTP + 6.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + Jscript + 6.000000 + + + + Neutral + 0.000000 + + + Positive + 0.397979 + + + Positive + 0.397979 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + XMLHttpRequest + 6.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + X-requested-with + 6.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + xml + 5.000000 + + + + Neutral + 0.000000 + + + Positive + 0.600000 + + + Positive + 0.600000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + research + 5.000000 + + + + Negative + -0.400000 + + + Negative + -0.400000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + htaccess file + 5.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + ie javascript jquery mime mod_rewrite programming rss xmlhttprequest + 5.000000 + + + + Neutral + 0.000000 + + + Positive + 0.500000 + + + Positive + 0.500000 + + + + Not At All + 1.000000 + + + To Some Extent + 2.000000 + + + + + javascript jquery mac mamp os x pecl + 5.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + tag + 5.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + safari + 5.000000 + + + + Negative + -0.600000 + + + Negative + -0.600000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + matt blog + 5.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + code + 5.000000 + + + + Negative + -0.066667 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + To Some Extent + 2.000000 + + + + + script + 5.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + other + 4.000000 + + + + Negative + -0.250000 + + + Negative + -0.250000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + web server + 4.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + symptom + 4.000000 + + + + Negative + -0.100508 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + solution + 4.000000 + + + + Neutral + 0.000000 + + + Positive + 0.347939 + + + Positive + 0.347939 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + server restart + 4.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + + xml + 4.000000 + + + + Negative + -0.400000 + + + Negative + -0.400000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + display area + 4.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + Querypath Twitterpated IE Xmlhttprequest + 4.000000 + + + + Neutral + 0.000000 + + + Positive + 0.500000 + + + Positive + 0.500000 + + + + Not At All + 1.000000 + + + To Some Extent + 2.000000 + + + + + RSS Javascript + 4.000000 + + + + Negative + -0.150000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + A Lot + 3.000000 + + + + + behavior + 4.000000 + + + + Negative + -0.400000 + + + Negative + -0.400000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + Aleph-null + 3.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + webserver + 3.000000 + + + + Negative + -0.900000 + + + Negative + -0.613015 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + Http:x-requested-with + 3.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + Rewriterule + 3.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + Solution + 3.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + T=text + 3.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + Webclips + 3.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + rss+xml + 3.000000 + + + + Neutral + 0.000000 + + + Positive + 0.800000 + + + Positive + 0.800000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + ^ + 3.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + ^XMLHttpRequest $ + 3.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + aleph-null.tv + 3.000000 + + + + Negative + -0.150000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + A Lot + 3.000000 + + + + + apache drupal + 2.000000 + + + + Neutral + 0.000000 + + + Positive + 0.500000 + + + Positive + 0.500000 + + + + Not At All + 1.000000 + + + To Some Extent + 2.000000 + + + + + occasion + 2.000000 + + + + Neutral + 0.000000 + + + Positive + 0.500000 + + + Positive + 0.500000 + + + + Not At All + 1.000000 + + + To Some Extent + 2.000000 + + + + + text + 2.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + configuration file + 2.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + A Lot + 3.000000 + + + Not At All + 1.000000 + + + + + matt + 2.000000 + + + + Neutral + 0.000000 + + + Positive + 0.500000 + + + Positive + 0.500000 + + + + Not At All + 1.000000 + + + To Some Extent + 2.000000 + + + + + failure + 1.000000 + + + + Negative + -0.100508 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + extension rss + 1.000000 + + + + Neutral + 0.000000 + + + Positive + 0.300000 + + + Positive + 0.300000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + jQuery use jQuery + 1.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + English + 1.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + description + 1.000000 + + + + Negative + -0.465534 + + + Negative + -0.465534 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + content type setting + 1.000000 + + + + Neutral + 0.000000 + + + Positive + 1.000000 + + + Positive + 1.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + line turn + 1.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + client + 1.000000 + + + + Negative + -0.300000 + + + Negative + -0.300000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + mime type text + 1.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Positive + 0.075000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + rss|xml|rdf + 1.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + rss+xml mime type + 1.000000 + + + + Neutral + 0.000000 + + + Positive + 1.000000 + + + Positive + 1.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + box + 1.000000 + + + + Neutral + 0.000000 + + + Positive + 0.347939 + + + Positive + 0.347939 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + rewrite engine + 1.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + rescue + 1.000000 + + + + Neutral + 0.000000 + + + Positive + 0.845782 + + + Positive + 0.845782 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + rdf + 1.000000 + + + + Neutral + 0.000000 + + + Positive + 0.600000 + + + Positive + 0.600000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + + + RSS + 53.000000 + + + + Negative + -0.066667 + + + Positive + 0.666667 + + + Positive + 1.000000 + + + + To Some Extent + 2.000000 + + + To Some Extent + 2.000000 + + + + + Apache + 31.000000 + + + + Negative + -0.300000 + + + Positive + 0.378744 + + + Positive + 0.845782 + + + + A Lot + 3.000000 + + + To Some Extent + 2.000000 + + + + + IE + 29.000000 + + + + Negative + -0.400000 + + + Positive + 0.200000 + + + Positive + 0.800000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + Xmlhttprequest + 21.000000 + + + + Negative + -0.900000 + + + Negative + -0.050000 + + + Positive + 0.800000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + Drupal + 19.000000 + + + + Negative + -0.066667 + + + Positive + 0.898987 + + + Positive + 1.000000 + + + + To Some Extent + 2.000000 + + + To Some Extent + 2.000000 + + + + + XML + 15.000000 + + + + Negative + -0.400000 + + + Positive + 0.200000 + + + Positive + 0.800000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + Javascript + 10.000000 + + + + Negative + -0.066667 + + + Positive + 0.500000 + + + Positive + 0.500000 + + + + To Some Extent + 2.000000 + + + To Some Extent + 2.000000 + + + + + AJAX + 9.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + IE Xmlhttprequest + 7.000000 + + + + Neutral + 0.000000 + + + Positive + 0.500000 + + + Positive + 0.500000 + + + + Not At All + 1.000000 + + + To Some Extent + 2.000000 + + + + + Firefox + 7.000000 + + + + Negative + -1.000000 + + + Negative + -1.000000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + Technosophos + 7.000000 + + + + Neutral + 0.000000 + + + Positive + 0.500000 + + + Positive + 0.500000 + + + + Not At All + 1.000000 + + + To Some Extent + 2.000000 + + + + + HTTP + 6.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + Jscript + 6.000000 + + + + Neutral + 0.000000 + + + Positive + 0.397979 + + + Positive + 0.397979 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + Content-type + 6.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + XMLHttpRequest + 6.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + X-requested-with + 6.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + Querypath Twitterpated IE Xmlhttprequest + 4.000000 + + + + Neutral + 0.000000 + + + Positive + 0.500000 + + + Positive + 0.500000 + + + + Not At All + 1.000000 + + + To Some Extent + 2.000000 + + + + + RSS Javascript + 4.000000 + + + + Negative + -0.150000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + A Lot + 3.000000 + + + + + Aleph-null + 3.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + Webclips + 3.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + T=text + 3.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + Solution + 3.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + Rewriterule + 3.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + Http:x-requested-with + 3.000000 + + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + Neutral + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + + + + + + feed TechnoSophos + 4.000000 + + + + other + 1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + Not At All + 1.000000 + + + To Some Extent + 2.000000 + + + + + not work + 4.000000 + + + + other + -1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + write jQuery + 3.000000 + + + + other + 1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + Not At All + 1.000000 + + + To Some Extent + 2.000000 + + + + + work with other webservers + 3.000000 + + + + other + 1.000000 + + + + Medium Low + 2.000000 + + + + Future + 4.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + turn out + 3.000000 + + + + other + -1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + try the feed + 3.000000 + + + + other + -1.000000 + + + + Low + 1.000000 + + + + Past + 1.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + serve out its RSS feed + 3.000000 + + + + help + 1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + send the X-Requested-With + 3.000000 + + + + other + 1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + report problems + 3.000000 + + + + communicate + 1.000000 + + + + Low + 1.000000 + + + + Past + 1.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + replicate the error + 3.000000 + + + + other + 1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + refine the problem + 3.000000 + + + + other + 1.000000 + + + + Medium Low + 1.500000 + + + + NA + 0.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + put in Apache's configuration file + 3.000000 + + + + other + 1.000000 + + + + Medium Low + 1.500000 + + + + Present + 3.000000 + + + + A Lot + 3.000000 + + + Not At All + 1.000000 + + + + + parse the contents + 3.000000 + + + + other + 1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + not use + 3.000000 + + + + use + -1.000000 + + + + Low + 1.000000 + + + + Future + 4.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + not treat files + 3.000000 + + + + other + -1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + match those requests + 3.000000 + + + + other + 1.000000 + + + + Medium Low + 1.500000 + + + + Present + 3.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + adjust the MIME type + 3.000000 + + + + other + 1.000000 + + + + Medium + 3.000000 + + + + Present + 3.000000 + + + + Not At All + 1.000000 + + + To Some Extent + 2.000000 + + + + + define by IE's XMLHTTPRequest object + 3.000000 + + + + other + 1.000000 + + + + Medium + 2.500000 + + + + Past + 1.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + emit no errors + 3.000000 + + + + other + 1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + feed + 3.000000 + + + + other + 1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + Not At All + 1.000000 + + + To Some Extent + 2.000000 + + + + + find the problem + 3.000000 + + + + other + 1.000000 + + + + Low + 1.000000 + + + + Future + 4.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + learn + 3.000000 + + + + learn + 1.000000 + + + + Low + 1.000000 + + + + Past + 1.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + get from the server + 3.000000 + + + + other + 1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + fix + 3.000000 + + + + other + 1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + be the problem + 2.000000 + + + + other + 1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + work out of the box + 2.000000 + + + + other + 1.000000 + + + + Medium + 2.500000 + + + + Future + 4.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + work + 2.000000 + + + + other + 1.000000 + + + + Low + 1.000000 + + + + Past + 1.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + confirm for me + 2.000000 + + + + communicate + 1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + load up to my server + 2.000000 + + + + other + 1.000000 + + + + Low + 1.000000 + + + + Past + 1.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + submit ie javascript jquery mime mod_rewrite programming rss xmlhttprequest + 2.000000 + + + + communicate + 1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + Not At All + 1.000000 + + + To Some Extent + 2.000000 + + + + + spit out RSS files + 2.000000 + + + + other + 1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + set the Content-type + 2.000000 + + + + other + 1.000000 + + + + Medium Low + 2.000000 + + + + NA + 0.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + devise out a solution + 2.000000 + + + + other + 1.000000 + + + + Medium + 2.500000 + + + + Future + 4.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + download + 2.000000 + + + + other + 1.000000 + + + + Low + 1.000000 + + + + Past + 1.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + require a server restart + 2.000000 + + + + request + 1.000000 + + + + Low + 1.000000 + + + + Future + 4.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + download their feeds + 2.000000 + + + + other + 1.000000 + + + + Low + 1.000000 + + + + Past + 1.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + label drupal javascript jquery mac mamp os x pecl + 2.000000 + + + + other + 1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + handle the next part + 2.000000 + + + + other + 1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + Not At All + 1.000000 + + + Not At All + 1.000000 + + + + + feed ie javascript jquery mime mod_rewrite programming rss xmlhttprequest + 2.000000 + + + + other + 1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + Not At All + 1.000000 + + + To Some Extent + 2.000000 + + + + + not work IE 7 + 2.000000 + + + + other + -1.000000 + + + + Low + 1.000000 + + + + Present + 3.000000 + + + + To Some Extent + 2.000000 + + + Not At All + 1.000000 + + + + + + + Adult + -0.007067 + + + Neutral + 0.000000 + + + Secondary + 2.000000 + + + + + No Slang + 0.114679 + + + Not Very Flamboyant + 2.000000 + + + + diff --git a/lib/querypath/test/coverage.sh b/lib/querypath/test/coverage.sh new file mode 100644 index 0000000..b4d7316 --- /dev/null +++ b/lib/querypath/test/coverage.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +phpunit=/Applications/MAMP/bin/php5/bin/phpunit +$phpunit --coverage-html coverage Tests +rm "db/qpTest.db" +rm "db/qpTest2.db" \ No newline at end of file diff --git a/lib/querypath/test/data.html b/lib/querypath/test/data.html new file mode 100644 index 0000000..85d15c5 --- /dev/null +++ b/lib/querypath/test/data.html @@ -0,0 +1,15 @@ + + + + + + data + + + + + +

    This is the title

    + + diff --git a/lib/querypath/test/data.xml b/lib/querypath/test/data.xml new file mode 100644 index 0000000..b4eec32 --- /dev/null +++ b/lib/querypath/test/data.xml @@ -0,0 +1,15 @@ + + + + + +
  • Hello
  • +
  • +
  • +
  • Last
  • + + +
  • + + This is footer text. + \ No newline at end of file diff --git a/lib/querypath/test/html.xml b/lib/querypath/test/html.xml new file mode 100644 index 0000000..2d014b6 --- /dev/null +++ b/lib/querypath/test/html.xml @@ -0,0 +1,15 @@ + + + + + + data + + + + + +

    This is the title

    + + \ No newline at end of file diff --git a/lib/querypath/test/no-writing.xml b/lib/querypath/test/no-writing.xml new file mode 100644 index 0000000..f4e5164 --- /dev/null +++ b/lib/querypath/test/no-writing.xml @@ -0,0 +1,2 @@ + + diff --git a/lib/querypath/test/runTests.sh b/lib/querypath/test/runTests.sh new file mode 100755 index 0000000..1868c66 --- /dev/null +++ b/lib/querypath/test/runTests.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +phpunit=/Applications/MAMP/bin/php5/bin/phpunit +cd .. +$phpunit test/Tests/ +cd - +rm "db/qpTest.db" +rm "db/qpTest2.db" diff --git a/lib/querypath/test/test.php b/lib/querypath/test/test.php new file mode 100755 index 0000000..a5fdd32 --- /dev/null +++ b/lib/querypath/test/test.php @@ -0,0 +1,153 @@ +#!/usr/bin/env php + + * @license The GNU Lesser GPL (LGPL) or an MIT-like license. + */ +require '../src/QueryPath/QueryPath.php'; +//$str = 'abc > def.g |: hi(jk)[lmn]*op'; +//$str = '&abc.def'; + +/** + * Testing harness for the CssEventHandler. + * @ingroup querypath_tests + */ +class SimpleTestCssEventHandler implements CssEventHandler { + var $stack = NULL; + var $expect = array(); + + public function __construct() { + $this->stack = array(); + } + + public function getStack() { + return $this->stack; + } + + public function dumpStack() { + print "\nExpected:\n"; + $format = "Element %d: %s\n"; + foreach ($this->expect as $item) { + printf($format, $item->eventType(), implode(',', $item->params())); + } + + print "Got:\n"; + foreach($this->stack as $item){ + printf($format, $item->eventType(), implode(',', $item->params())); + } + } + + public function expects($stack) { + $this->expect = $stack; + } + + public function success() { + return ($this->expect == $this->stack); + } + + public function elementID($id) { + $this->stack[] = new TestEvent(TestEvent::elementID, $id); + } + public function element($name) { + $this->stack[] = new TestEvent(TestEvent::element, $name); + } + public function elementNS($name, $namespace = NULL){ + $this->stack[] = new TestEvent(TestEvent::elementNS, $name, $namespace); + } + public function anyElement(){ + $this->stack[] = new TestEvent(TestEvent::anyElement); + } + public function anyElementInNS($ns){ + $this->stack[] = new TestEvent(TestEvent::anyElementInNS, $ns); + } + public function elementClass($name){ + $this->stack[] = new TestEvent(TestEvent::elementClass, $name); + } + public function attribute($name, $value = NULL, $operation = CssEventHandler::isExactly){ + $this->stack[] = new TestEvent(TestEvent::attribute, $name, $value, $operation); + } + public function attributeNS($name, $ns, $value = NULL, $operation = CssEventHandler::isExactly){ + $this->stack[] = new TestEvent(TestEvent::attributeNS, $name, $ns, $value, $operation); + } + public function pseudoClass($name, $value = NULL){ + $this->stack[] = new TestEvent(TestEvent::pseudoClass, $name, $value); + } + public function pseudoElement($name){ + $this->stack[] = new TestEvent(TestEvent::pseudoElement, $name); + } + public function directDescendant(){ + $this->stack[] = new TestEvent(TestEvent::directDescendant); + } + public function anyDescendant() { + $this->stack[] = new TestEvent(TestEvent::anyDescendant); + } + public function adjacent(){ + $this->stack[] = new TestEvent(TestEvent::adjacent); + } + public function anotherSelector(){ + $this->stack[] = new TestEvent(TestEvent::anotherSelector); + } + public function sibling(){ + $this->stack[] = new TestEvent(TestEvent::sibling); + } +} + +/** + * Simple utility object for use with the TestCssEventHandler. + * @ingroup querypath_tests + */ +class TestEvent { + const elementID = 0; + const element = 1; + const elementNS = 2; + const anyElement = 3; + const elementClass = 4; + const attribute = 5; + const attributeNS = 6; + const pseudoClass = 7; + const pseudoElement = 8; + const directDescendant = 9; + const adjacent = 10; + const anotherSelector = 11; + const sibling = 12; + const anyElementInNS = 13; + const anyDescendant = 14; + + var $type = NULL; + var $params = NULL; + + public function __construct($event_type) { + + + + $this->type = $event_type; + $args = func_get_args(); + array_shift($args); + $this->params = $args; + + print "Event " . $event_type; + print_r($args); + } + + public function eventType() { + return $this->type; + } + + public function params() { + return $this->params; + } +} + +print ord('"'); +#$str = 'tag.class #id :test (test) + anotherElement > yetAnother[test] more[test="ing"]'; +$str = 'tag.class #id :test (test)'; +print "Now testing: $str\n"; + +$c = new SimpleTestCssEventHandler(); + +$p = new CssParser($str, $c); +$p->parse(); + diff --git a/providers/Motorsport.php b/providers/Motorsport.php new file mode 100644 index 0000000..f055ad6 --- /dev/null +++ b/providers/Motorsport.php @@ -0,0 +1,116 @@ +entities->urls as $url) { + $links[$url->expanded_url] = $item; + } + } + $links = array_filter( + $links, + function($link) { + $linkParts = parse_url($link); + return $linkParts['host'] != 'twitter.com'; + }, + ARRAY_FILTER_USE_KEY + ); + $content = []; + foreach ($links as $link => $i) { + $item = new \stdClass(); + $item->title = $i->text; + $item->time = $i->created_at; + $item->link = $link; + if (isset($i->user)) { + $item->author = $i->user->screen_name; + } + $item->content = $this->_getLinkMetadata($this->_getLinkContent($link)); + $content[] = $item; + } + return $content; + } + + protected function _getLinkContent($link) { + $cacheHash = md5($link); + $cacheFile = sprintf($this->_getCachePath() . '.%s', $this->_feed, $cacheHash); + if (file_exists($cacheFile)) { + return file_get_contents($cacheFile); + } else { + $content = file_get_contents($link); + file_put_contents($cacheFile, $content); + return $content; + } + } + + protected function _getLinkMetadata($content) { + $tree = htmlqp($content); + $meta = $tree->find('meta'); + $metadata = []; + foreach ($meta as $tag) { + $attributes = $tag->attr(); + if (isset($attributes['property'])) { + $metadata[$attributes['property']] = $attributes['content']; + } + } + if (isset($metadata['og:url'])) { + $originalUrl = parse_url($metadata['og:url']); + if (substr($originalUrl['host'], -22) != 'motorsportmagazine.com') { + unset($metadata['og:url']); + } + } + return $metadata; + } + + protected function _spamFilter($items) { + return array_filter( + $items, + function ($item) { + return isset($item->content['article:published_time']); + } + ); + } + + protected function _mapItems($content) { + $items = []; + foreach ($content as $i) { + $url = isset($i->content['og:url']) ? $i->content['og:url'] : $i->link; + $item = new Item(); + $item->ID = md5($url); + $item->Title = isset($i->content['article:published_time']) ? $i->content['og:title'] : $i->title; + $item->Link = $url; + $item->Text = isset($i->content['article:published_time']) ? $i->content['og:description'] : $i->content['og:title']; + $item->Time = isset($i->content['article:published_time']) ? $i->content['article:published_time'] : $i->time; + if (isset($i->author)) { + $item->Author = $i->author; + } + $items[] = $item; + } + return $items; + } + + protected function _sortContent($content) { + return $content; + } + +} + +?> -- cgit v1.2.3