summaryrefslogtreecommitdiff
path: root/framework/IO/SafeHtml/HTMLSax3
diff options
context:
space:
mode:
Diffstat (limited to 'framework/IO/SafeHtml/HTMLSax3')
-rw-r--r--framework/IO/SafeHtml/HTMLSax3/Decorators.php363
-rw-r--r--framework/IO/SafeHtml/HTMLSax3/States.php288
2 files changed, 651 insertions, 0 deletions
diff --git a/framework/IO/SafeHtml/HTMLSax3/Decorators.php b/framework/IO/SafeHtml/HTMLSax3/Decorators.php
new file mode 100644
index 00000000..6256706c
--- /dev/null
+++ b/framework/IO/SafeHtml/HTMLSax3/Decorators.php
@@ -0,0 +1,363 @@
+<?php
+/* vim: set expandtab tabstop=4 shiftwidth=4: */
+//
+// +----------------------------------------------------------------------+
+// | PHP Version 4 |
+// +----------------------------------------------------------------------+
+// | Copyright (c) 1997-2002 The PHP Group |
+// +----------------------------------------------------------------------+
+// | This source file is subject to version 2.02 of the PHP license, |
+// | that is bundled with this package in the file LICENSE, and is |
+// | available at through the world-wide-web at |
+// | http://www.php.net/license/3_0.txt. |
+// | If you did not receive a copy of the PHP license and are unable to |
+// | obtain it through the world-wide-web, please send a note to |
+// | license@php.net so we can mail you a copy immediately. |
+// +----------------------------------------------------------------------+
+// | Authors: Alexander Zhukov <alex@veresk.ru> Original port from Python |
+// | Authors: Harry Fuecks <hfuecks@phppatterns.com> Port to PEAR + more |
+// | Authors: Many @ Sitepointforums Advanced PHP Forums |
+// +----------------------------------------------------------------------+
+//
+// $Id: Decorators.php,v 1.2 2005/12/22 11:09:09 weizhuo Exp $
+//
+/**
+* Decorators for dealing with parser options
+* @package System.Security.SafeHtml
+* @version $Id: Decorators.php,v 1.2 2005/12/22 11:09:09 weizhuo Exp $
+* @see TSax3::set_option
+*/
+/**
+* Trims the contents of element data from whitespace at start and end
+* @package System.Security.SafeHtml
+* @access protected
+*/
+class TSax3_Trim {
+ /**
+ * Original handler object
+ * @var object
+ * @access private
+ */
+ private $orig_obj;
+ /**
+ * Original handler method
+ * @var string
+ * @access private
+ */
+ private $orig_method;
+ /**
+ * Constructs TSax3_Trim
+ * @param object handler object being decorated
+ * @param string original handler method
+ * @access protected
+ */
+ function __construct(&$orig_obj, $orig_method) {
+ $this->orig_obj =& $orig_obj;
+ $this->orig_method = $orig_method;
+ }
+ /**
+ * Trims the data
+ * @param TSax3
+ * @param string element data
+ * @access protected
+ */
+ function trimData(&$parser, $data) {
+ $data = trim($data);
+ if ($data != '') {
+ $this->orig_obj->{$this->orig_method}($parser, $data);
+ }
+ }
+}
+/**
+* Coverts tag names to upper case
+* @package System.Security.SafeHtml
+* @access protected
+*/
+class TSax3_CaseFolding {
+ /**
+ * Original handler object
+ * @var object
+ * @access private
+ */
+ private $orig_obj;
+ /**
+ * Original open handler method
+ * @var string
+ * @access private
+ */
+ private $orig_open_method;
+ /**
+ * Original close handler method
+ * @var string
+ * @access private
+ */
+ private $orig_close_method;
+ /**
+ * Constructs TSax3_CaseFolding
+ * @param object handler object being decorated
+ * @param string original open handler method
+ * @param string original close handler method
+ * @access protected
+ */
+ function __construct(&$orig_obj, $orig_open_method, $orig_close_method) {
+ $this->orig_obj =& $orig_obj;
+ $this->orig_open_method = $orig_open_method;
+ $this->orig_close_method = $orig_close_method;
+ }
+ /**
+ * Folds up open tag callbacks
+ * @param TSax3
+ * @param string tag name
+ * @param array tag attributes
+ * @access protected
+ */
+ function foldOpen(&$parser, $tag, $attrs=array(), $empty = FALSE) {
+ $this->orig_obj->{$this->orig_open_method}($parser, strtoupper($tag), $attrs, $empty);
+ }
+ /**
+ * Folds up close tag callbacks
+ * @param TSax3
+ * @param string tag name
+ * @access protected
+ */
+ function foldClose(&$parser, $tag, $empty = FALSE) {
+ $this->orig_obj->{$this->orig_close_method}($parser, strtoupper($tag), $empty);
+ }
+}
+/**
+* Breaks up data by linefeed characters, resulting in additional
+* calls to the data handler
+* @package System.Security.SafeHtml
+* @access protected
+*/
+class TSax3_Linefeed {
+ /**
+ * Original handler object
+ * @var object
+ * @access private
+ */
+ private $orig_obj;
+ /**
+ * Original handler method
+ * @var string
+ * @access private
+ */
+ private $orig_method;
+ /**
+ * Constructs TSax3_LineFeed
+ * @param object handler object being decorated
+ * @param string original handler method
+ * @access protected
+ */
+ function __construct(&$orig_obj, $orig_method) {
+ $this->orig_obj =& $orig_obj;
+ $this->orig_method = $orig_method;
+ }
+ /**
+ * Breaks the data up by linefeeds
+ * @param TSax3
+ * @param string element data
+ * @access protected
+ */
+ function breakData(&$parser, $data) {
+ $data = explode("\n",$data);
+ foreach ( $data as $chunk ) {
+ $this->orig_obj->{$this->orig_method}($parser, $chunk);
+ }
+ }
+}
+/**
+* Breaks up data by tab characters, resulting in additional
+* calls to the data handler
+* @package System.Security.SafeHtml
+* @access protected
+*/
+class TSax3_Tab {
+ /**
+ * Original handler object
+ * @var object
+ * @access private
+ */
+ private $orig_obj;
+ /**
+ * Original handler method
+ * @var string
+ * @access private
+ */
+ private $orig_method;
+ /**
+ * Constructs TSax3_Tab
+ * @param object handler object being decorated
+ * @param string original handler method
+ * @access protected
+ */
+ function __construct(&$orig_obj, $orig_method) {
+ $this->orig_obj =& $orig_obj;
+ $this->orig_method = $orig_method;
+ }
+ /**
+ * Breaks the data up by linefeeds
+ * @param TSax3
+ * @param string element data
+ * @access protected
+ */
+ function breakData(&$parser, $data) {
+ $data = explode("\t",$data);
+ foreach ( $data as $chunk ) {
+ $this->orig_obj->{$this->orig_method}($this, $chunk);
+ }
+ }
+}
+/**
+* Breaks up data by XML entities and parses them with html_entity_decode(),
+* resulting in additional calls to the data handler<br />
+* Requires PHP 4.3.0+
+* @package System.Security.SafeHtml
+* @access protected
+*/
+class TSax3_Entities_Parsed {
+ /**
+ * Original handler object
+ * @var object
+ * @access private
+ */
+ private $orig_obj;
+ /**
+ * Original handler method
+ * @var string
+ * @access private
+ */
+ private $orig_method;
+ /**
+ * Constructs TSax3_Entities_Parsed
+ * @param object handler object being decorated
+ * @param string original handler method
+ * @access protected
+ */
+ function __construct(&$orig_obj, $orig_method) {
+ $this->orig_obj =& $orig_obj;
+ $this->orig_method = $orig_method;
+ }
+ /**
+ * Breaks the data up by XML entities
+ * @param TSax3
+ * @param string element data
+ * @access protected
+ */
+ function breakData(&$parser, $data) {
+ $data = preg_split('/(&.+?;)/',$data,-1,PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
+ foreach ( $data as $chunk ) {
+ $chunk = html_entity_decode($chunk,ENT_NOQUOTES);
+ $this->orig_obj->{$this->orig_method}($this, $chunk);
+ }
+ }
+}
+/**
+* Compatibility with older PHP versions
+*/
+if (version_compare(phpversion(), '4.3', '<') && !function_exists('html_entity_decode') ) {
+ function html_entity_decode($str, $style=ENT_NOQUOTES) {
+ return strtr($str,
+ array_flip(get_html_translation_table(HTML_ENTITIES,$style)));
+ }
+}
+/**
+* Breaks up data by XML entities but leaves them unparsed,
+* resulting in additional calls to the data handler<br />
+* @package System.Security.SafeHtml
+* @access protected
+*/
+class TSax3_Entities_Unparsed {
+ /**
+ * Original handler object
+ * @var object
+ * @access private
+ */
+ private $orig_obj;
+ /**
+ * Original handler method
+ * @var string
+ * @access private
+ */
+ private $orig_method;
+ /**
+ * Constructs TSax3_Entities_Unparsed
+ * @param object handler object being decorated
+ * @param string original handler method
+ * @access protected
+ */
+ function __construct(&$orig_obj, $orig_method) {
+ $this->orig_obj =& $orig_obj;
+ $this->orig_method = $orig_method;
+ }
+ /**
+ * Breaks the data up by XML entities
+ * @param TSax3
+ * @param string element data
+ * @access protected
+ */
+ function breakData(&$parser, $data) {
+ $data = preg_split('/(&.+?;)/',$data,-1,PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
+ foreach ( $data as $chunk ) {
+ $this->orig_obj->{$this->orig_method}($this, $chunk);
+ }
+ }
+}
+
+/**
+* Strips the HTML comment markers or CDATA sections from an escape.
+* If XML_OPTIONS_FULL_ESCAPES is on, this decorator is not used.<br />
+* @package System.Security.SafeHtml
+* @access protected
+*/
+class TSax3_Escape_Stripper {
+ /**
+ * Original handler object
+ * @var object
+ * @access private
+ */
+ private $orig_obj;
+ /**
+ * Original handler method
+ * @var string
+ * @access private
+ */
+ private $orig_method;
+ /**
+ * Constructs TSax3_Entities_Unparsed
+ * @param object handler object being decorated
+ * @param string original handler method
+ * @access protected
+ */
+ function __construct(&$orig_obj, $orig_method) {
+ $this->orig_obj =& $orig_obj;
+ $this->orig_method = $orig_method;
+ }
+ /**
+ * Breaks the data up by XML entities
+ * @param TSax3
+ * @param string element data
+ * @access protected
+ */
+ function strip(&$parser, $data) {
+ // Check for HTML comments first
+ if ( substr($data,0,2) == '--' ) {
+ $patterns = array(
+ '/^\-\-/', // Opening comment: --
+ '/\-\-$/', // Closing comment: --
+ );
+ $data = preg_replace($patterns,'',$data);
+
+ // Check for XML CDATA sections (note: don't do both!)
+ } else if ( substr($data,0,1) == '[' ) {
+ $patterns = array(
+ '/^\[.*CDATA.*\[/s', // Opening CDATA
+ '/\].*\]$/s', // Closing CDATA
+ );
+ $data = preg_replace($patterns,'',$data);
+ }
+
+ $this->orig_obj->{$this->orig_method}($this, $data);
+ }
+}
+?> \ No newline at end of file
diff --git a/framework/IO/SafeHtml/HTMLSax3/States.php b/framework/IO/SafeHtml/HTMLSax3/States.php
new file mode 100644
index 00000000..2b863a59
--- /dev/null
+++ b/framework/IO/SafeHtml/HTMLSax3/States.php
@@ -0,0 +1,288 @@
+<?php
+/* vim: set expandtab tabstop=4 shiftwidth=4: */
+//
+// +----------------------------------------------------------------------+
+// | PHP Version 4 |
+// +----------------------------------------------------------------------+
+// | Copyright (c) 1997-2002 The PHP Group |
+// +----------------------------------------------------------------------+
+// | This source file is subject to version 2.02 of the PHP license, |
+// | that is bundled with this package in the file LICENSE, and is |
+// | available at through the world-wide-web at |
+// | http://www.php.net/license/3_0.txt. |
+// | If you did not receive a copy of the PHP license and are unable to |
+// | obtain it through the world-wide-web, please send a note to |
+// | license@php.net so we can mail you a copy immediately. |
+// +----------------------------------------------------------------------+
+// | Authors: Alexander Zhukov <alex@veresk.ru> Original port from Python |
+// | Authors: Harry Fuecks <hfuecks@phppatterns.com> Port to PEAR + more |
+// | Authors: Many @ Sitepointforums Advanced PHP Forums |
+// +----------------------------------------------------------------------+
+//
+// $Id: States.php,v 1.2 2005/12/22 11:09:09 weizhuo Exp $
+//
+/**
+* Parsing states.
+* @package System.Security.SafeHtml
+* @version $Id: States.php,v 1.2 2005/12/22 11:09:09 weizhuo Exp $
+*/
+/**
+* Define parser states
+*/
+/*define('TSAX3_STATE_STOP', 0);
+define('TSAX3_STATE_START', 1);
+define('TSAX3_STATE_TAG', 2);
+define('TSAX3_STATE_OPENING_TAG', 3);
+define('TSAX3_STATE_CLOSING_TAG', 4);
+define('TSAX3_STATE_ESCAPE', 6);
+define('TSAX3_STATE_JASP', 7);
+define('TSAX3_STATE_PI', 8);
+*/
+/**
+* StartingState searches for the start of any XML tag
+* @package System.Security.SafeHtml
+* @access protected
+*/
+class TSax3_StartingState {
+ /**
+ * @param TSax3_StateParser subclass
+ * @return constant TSAX3_STATE_TAG
+ * @access protected
+ */
+ function parse(&$context) {
+ $data = $context->scanUntilString('<');
+ if ($data != '') {
+ $context->handler_object_data->
+ {$context->handler_method_data}($context->htmlsax, $data);
+ }
+ $context->IgnoreCharacter();
+ return TSax3_StateParser::TSAX3_STATE_TAG;
+ }
+}
+/**
+* Decides which state to move one from after StartingState
+* @package System.Security.SafeHtml
+* @access protected
+*/
+class TSax3_TagState {
+ /**
+ * @param TSax3_StateParser subclass
+ * @return constant the next state to move into
+ * @access protected
+ */
+ function parse(&$context) {
+ switch($context->ScanCharacter()) {
+ case '/':
+ return TSax3_StateParser::TSAX3_STATE_CLOSING_TAG;
+ break;
+ case '?':
+ return TSax3_StateParser::TSAX3_STATE_PI;
+ break;
+ case '%':
+ return TSax3_StateParser::TSAX3_STATE_JASP;
+ break;
+ case '!':
+ return TSax3_StateParser::TSAX3_STATE_ESCAPE;
+ break;
+ default:
+ $context->unscanCharacter();
+ return TSax3_StateParser::TSAX3_STATE_OPENING_TAG;
+ }
+ }
+}
+/**
+* Dealing with closing XML tags
+* @package System.Security.SafeHtml
+* @access protected
+*/
+class TSax3_ClosingTagState {
+ /**
+ * @param TSax3_StateParser subclass
+ * @return constant TSAX3_STATE_START
+ * @access protected
+ */
+ function parse(&$context) {
+ $tag = $context->scanUntilCharacters('/>');
+ if ($tag != '') {
+ $char = $context->scanCharacter();
+ if ($char == '/') {
+ $char = $context->scanCharacter();
+ if ($char != '>') {
+ $context->unscanCharacter();
+ }
+ }
+ $context->handler_object_element->
+ {$context->handler_method_closing}($context->htmlsax, $tag, FALSE);
+ }
+ return TSax3_StateParser::TSAX3_STATE_START;
+ }
+}
+/**
+* Dealing with opening XML tags
+* @package System.Security.SafeHtml
+* @access protected
+*/
+class TSax3_OpeningTagState {
+ /**
+ * Handles attributes
+ * @param string attribute name
+ * @param string attribute value
+ * @return void
+ * @access protected
+ * @see TSax3_AttributeStartState
+ */
+ function parseAttributes(&$context) {
+ $Attributes = array();
+
+ $context->ignoreWhitespace();
+ $attributename = $context->scanUntilCharacters("=/> \n\r\t");
+ while ($attributename != '') {
+ $attributevalue = NULL;
+ $context->ignoreWhitespace();
+ $char = $context->scanCharacter();
+ if ($char == '=') {
+ $context->ignoreWhitespace();
+ $char = $context->ScanCharacter();
+ if ($char == '"') {
+ $attributevalue= $context->scanUntilString('"');
+ $context->IgnoreCharacter();
+ } else if ($char == "'") {
+ $attributevalue = $context->scanUntilString("'");
+ $context->IgnoreCharacter();
+ } else {
+ $context->unscanCharacter();
+ $attributevalue =
+ $context->scanUntilCharacters("> \n\r\t");
+ }
+ } else if ($char !== NULL) {
+ $attributevalue = NULL;
+ $context->unscanCharacter();
+ }
+ $Attributes[$attributename] = $attributevalue;
+
+ $context->ignoreWhitespace();
+ $attributename = $context->scanUntilCharacters("=/> \n\r\t");
+ }
+ return $Attributes;
+ }
+
+ /**
+ * @param TSax3_StateParser subclass
+ * @return constant TSAX3_STATE_START
+ * @access protected
+ */
+ function parse(&$context) {
+ $tag = $context->scanUntilCharacters("/> \n\r\t");
+ if ($tag != '') {
+ $this->attrs = array();
+ $Attributes = $this->parseAttributes($context);
+ $char = $context->scanCharacter();
+ if ($char == '/') {
+ $char = $context->scanCharacter();
+ if ($char != '>') {
+ $context->unscanCharacter();
+ }
+ $context->handler_object_element->
+ {$context->handler_method_opening}($context->htmlsax, $tag,
+ $Attributes, TRUE);
+ $context->handler_object_element->
+ {$context->handler_method_closing}($context->htmlsax, $tag,
+ TRUE);
+ } else {
+ $context->handler_object_element->
+ {$context->handler_method_opening}($context->htmlsax, $tag,
+ $Attributes, FALSE);
+ }
+ }
+ return TSax3_StateParser::TSAX3_STATE_START;
+ }
+}
+
+/**
+* Deals with XML escapes handling comments and CDATA correctly
+* @package System.Security.SafeHtml
+* @access protected
+*/
+class TSax3_EscapeState {
+ /**
+ * @param TSax3_StateParser subclass
+ * @return constant TSAX3_STATE_START
+ * @access protected
+ */
+ function parse(&$context) {
+ $char = $context->ScanCharacter();
+ if ($char == '-') {
+ $char = $context->ScanCharacter();
+ if ($char == '-') {
+ $context->unscanCharacter();
+ $context->unscanCharacter();
+ $text = $context->scanUntilString('-->');
+ $text .= $context->scanCharacter();
+ $text .= $context->scanCharacter();
+ } else {
+ $context->unscanCharacter();
+ $text = $context->scanUntilString('>');
+ }
+ } else if ( $char == '[') {
+ $context->unscanCharacter();
+ $text = $context->scanUntilString(']>');
+ $text.= $context->scanCharacter();
+ } else {
+ $context->unscanCharacter();
+ $text = $context->scanUntilString('>');
+ }
+
+ $context->IgnoreCharacter();
+ if ($text != '') {
+ $context->handler_object_escape->
+ {$context->handler_method_escape}($context->htmlsax, $text);
+ }
+ return TSax3_StateParser::TSAX3_STATE_START;
+ }
+}
+/**
+* Deals with JASP/ASP markup
+* @package System.Security.SafeHtml
+* @access protected
+*/
+class TSax3_JaspState {
+ /**
+ * @param TSax3_StateParser subclass
+ * @return constant TSAX3_STATE_START
+ * @access protected
+ */
+ function parse(&$context) {
+ $text = $context->scanUntilString('%>');
+ if ($text != '') {
+ $context->handler_object_jasp->
+ {$context->handler_method_jasp}($context->htmlsax, $text);
+ }
+ $context->IgnoreCharacter();
+ $context->IgnoreCharacter();
+ return TSax3_StateParser::TSAX3_STATE_START;
+ }
+}
+/**
+* Deals with XML processing instructions
+* @package System.Security.SafeHtml
+* @access protected
+*/
+class TSax3_PiState {
+ /**
+ * @param TSax3_StateParser subclass
+ * @return constant TSAX3_STATE_START
+ * @access protected
+ */
+ function parse(&$context) {
+ $target = $context->scanUntilCharacters(" \n\r\t");
+ $data = $context->scanUntilString('?>');
+ if ($data != '') {
+ $context->handler_object_pi->
+ {$context->handler_method_pi}($context->htmlsax, $target, $data);
+ }
+ $context->IgnoreCharacter();
+ $context->IgnoreCharacter();
+ return TSax3_StateParser::TSAX3_STATE_START;
+ }
+}
+?> \ No newline at end of file