summaryrefslogtreecommitdiff
path: root/vendor/miniflux/picofeed/lib/PicoFeed/Parser/XmlParser.php
diff options
context:
space:
mode:
authorFrederic Guillot <fred@kanboard.net>2017-10-25 16:22:10 -0700
committerFrederic Guillot <fred@kanboard.net>2017-10-25 16:22:10 -0700
commit9e2b2a32fd0e967ad3184e9a5d091a29953acb91 (patch)
tree00822e24aa1110c73ca455a8d096ef296c008cbc /vendor/miniflux/picofeed/lib/PicoFeed/Parser/XmlParser.php
parentc507c5416251c505cb3e088a03c6664bed73c812 (diff)
Include composer dependencies in repo
Diffstat (limited to 'vendor/miniflux/picofeed/lib/PicoFeed/Parser/XmlParser.php')
-rw-r--r--vendor/miniflux/picofeed/lib/PicoFeed/Parser/XmlParser.php246
1 files changed, 246 insertions, 0 deletions
diff --git a/vendor/miniflux/picofeed/lib/PicoFeed/Parser/XmlParser.php b/vendor/miniflux/picofeed/lib/PicoFeed/Parser/XmlParser.php
new file mode 100644
index 00000000..ea42949f
--- /dev/null
+++ b/vendor/miniflux/picofeed/lib/PicoFeed/Parser/XmlParser.php
@@ -0,0 +1,246 @@
+<?php
+
+namespace PicoFeed\Parser;
+
+use DOMDocument;
+use SimpleXMLElement;
+use ZendXml\Exception\RuntimeException;
+use ZendXml\Security;
+
+/**
+ * XML parser class.
+ *
+ * Checks for XML eXternal Entity (XXE) and XML Entity Expansion (XEE) attacks on XML documents
+ *
+ * @package PicoFeed\Parser
+ * @author Frederic Guillot
+ */
+class XmlParser
+{
+ /**
+ * Get a SimpleXmlElement instance or return false.
+ *
+ * @static
+ * @param string $input XML content
+ * @return mixed
+ */
+ public static function getSimpleXml($input)
+ {
+ return self::scan($input);
+ }
+
+ /**
+ * Get a DomDocument instance or return false.
+ *
+ * @static
+ * @param string $input XML content
+ * @return DOMDocument
+ */
+ public static function getDomDocument($input)
+ {
+ if (empty($input)) {
+ return false;
+ }
+
+ $dom = self::scan($input, new DOMDocument());
+
+ // The document is empty, there is probably some parsing errors
+ if ($dom && $dom->childNodes->length === 0) {
+ return false;
+ }
+
+ return $dom;
+ }
+
+ /**
+ * Small wrapper around ZendXml to turn their exceptions into PicoFeed exceptions
+ *
+ * @static
+ * @access private
+ * @param string $input
+ * @param DOMDocument $dom
+ * @throws XmlEntityException
+ * @return SimpleXMLElement|DomDocument|boolean
+ */
+ private static function scan($input, $dom = null)
+ {
+ try {
+ return Security::scan($input, $dom);
+ } catch(RuntimeException $e) {
+ throw new XmlEntityException($e->getMessage());
+ }
+ }
+
+ /**
+ * Load HTML document by using a DomDocument instance or return false on failure.
+ *
+ * @static
+ * @access public
+ * @param string $input XML content
+ * @return DOMDocument
+ */
+ public static function getHtmlDocument($input)
+ {
+ $dom = new DomDocument();
+
+ if (empty($input)) {
+ return $dom;
+ }
+
+ libxml_use_internal_errors(true);
+
+ if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
+ $dom->loadHTML($input, LIBXML_NONET);
+ } else {
+ $dom->loadHTML($input);
+ }
+
+ return $dom;
+ }
+
+ /**
+ * Convert a HTML document to XML.
+ *
+ * @static
+ * @access public
+ * @param string $html HTML document
+ * @return string
+ */
+ public static function htmlToXml($html)
+ {
+ $dom = self::getHtmlDocument('<?xml version="1.0" encoding="UTF-8">'.$html);
+ return $dom->saveXML($dom->getElementsByTagName('body')->item(0));
+ }
+
+ /**
+ * Get XML parser errors.
+ *
+ * @static
+ * @access public
+ * @return string
+ */
+ public static function getErrors()
+ {
+ $errors = array();
+
+ foreach (libxml_get_errors() as $error) {
+ $errors[] = sprintf('XML error: %s (Line: %d - Column: %d - Code: %d)',
+ $error->message,
+ $error->line,
+ $error->column,
+ $error->code
+ );
+ }
+
+ return implode(', ', $errors);
+ }
+
+ /**
+ * Get the encoding from a xml tag.
+ *
+ * @static
+ * @access public
+ * @param string $data Input data
+ * @return string
+ */
+ public static function getEncodingFromXmlTag($data)
+ {
+ $encoding = '';
+
+ if (strpos($data, '<?xml') !== false) {
+ $data = substr($data, 0, strrpos($data, '?>'));
+ $data = str_replace("'", '"', $data);
+
+ $p1 = strpos($data, 'encoding=');
+ $p2 = strpos($data, '"', $p1 + 10);
+
+ if ($p1 !== false && $p2 !== false) {
+ $encoding = substr($data, $p1 + 10, $p2 - $p1 - 10);
+ $encoding = strtolower($encoding);
+ }
+ }
+
+ return $encoding;
+ }
+
+ /**
+ * Get the charset from a meta tag.
+ *
+ * @static
+ * @access public
+ * @param string $data Input data
+ * @return string
+ */
+ public static function getEncodingFromMetaTag($data)
+ {
+ $encoding = '';
+
+ if (preg_match('/<meta.*?charset\s*=\s*["\']?\s*([^"\'\s\/>;]+)/i', $data, $match) === 1) {
+ $encoding = strtolower($match[1]);
+ }
+
+ return $encoding;
+ }
+
+ /**
+ * Rewrite XPath query to use namespace-uri and local-name derived from prefix.
+ *
+ * @static
+ * @access public
+ * @param string $query XPath query
+ * @param array $ns Prefix to namespace URI mapping
+ * @return string
+ */
+ public static function replaceXPathPrefixWithNamespaceURI($query, array $ns)
+ {
+ return preg_replace_callback('/([A-Z0-9]+):([A-Z0-9]+)/iu', function ($matches) use ($ns) {
+ // don't try to map the special prefix XML
+ if (strtolower($matches[1]) === 'xml') {
+ return $matches[0];
+ }
+
+ return '*[namespace-uri()="'.$ns[$matches[1]].'" and local-name()="'.$matches[2].'"]';
+ },
+ $query);
+ }
+
+ /**
+ * Get the result elements of a XPath query.
+ *
+ * @static
+ * @access public
+ * @param SimpleXMLElement $xml XML element
+ * @param string $query XPath query
+ * @param array $ns Prefix to namespace URI mapping
+ * @return SimpleXMLElement[]
+ */
+ public static function getXPathResult(SimpleXMLElement $xml, $query, array $ns = array())
+ {
+ if (!empty($ns)) {
+ $query = static::replaceXPathPrefixWithNamespaceURI($query, $ns);
+ }
+
+ return $xml->xpath($query);
+ }
+
+ /**
+ * Get the first Xpath result or SimpleXMLElement value
+ *
+ * @static
+ * @access public
+ * @param mixed $value
+ * @return string
+ */
+ public static function getValue($value)
+ {
+ $result = '';
+
+ if (is_array($value) && count($value) > 0) {
+ $result = (string) $value[0];
+ } elseif (is_a($value, 'SimpleXMLElement')) {
+ return $result = (string) $value;
+ }
+
+ return trim($result);
+ }
+}