summaryrefslogtreecommitdiff
path: root/tests/test_tools/simpletest/parser.php
diff options
context:
space:
mode:
authorxue <>2006-07-16 01:50:23 +0000
committerxue <>2006-07-16 01:50:23 +0000
commitaf68030fcf0c266300feb2c100149ecadef7d364 (patch)
tree76b7c8ad5d8227870b9ef10c3e7b92a36336b320 /tests/test_tools/simpletest/parser.php
parent4b78404c20490a615459267426ce9e6737bf4485 (diff)
Merge from 3.0 branch till 1264.
Diffstat (limited to 'tests/test_tools/simpletest/parser.php')
-rw-r--r--tests/test_tools/simpletest/parser.php213
1 files changed, 118 insertions, 95 deletions
diff --git a/tests/test_tools/simpletest/parser.php b/tests/test_tools/simpletest/parser.php
index 7dc56a58..3f58e7e9 100644
--- a/tests/test_tools/simpletest/parser.php
+++ b/tests/test_tools/simpletest/parser.php
@@ -3,23 +3,33 @@
* base include file for SimpleTest
* @package SimpleTest
* @subpackage MockObjects
- * @version $Id: parser.php,v 1.66 2005/02/06 04:03:27 lastcraft Exp $
+ * @version $Id: parser.php,v 1.72 2006/01/04 03:15:05 lastcraft Exp $
*/
/**#@+
* Lexer mode stack constants
*/
- define("LEXER_ENTER", 1);
- define("LEXER_MATCHED", 2);
- define("LEXER_UNMATCHED", 3);
- define("LEXER_EXIT", 4);
- define("LEXER_SPECIAL", 5);
+ if (! defined('LEXER_ENTER')) {
+ define('LEXER_ENTER', 1);
+ }
+ if (! defined('LEXER_MATCHED')) {
+ define('LEXER_MATCHED', 2);
+ }
+ if (! defined('LEXER_UNMATCHED')) {
+ define('LEXER_UNMATCHED', 3);
+ }
+ if (! defined('LEXER_EXIT')) {
+ define('LEXER_EXIT', 4);
+ }
+ if (! defined('LEXER_SPECIAL')) {
+ define('LEXER_SPECIAL', 5);
+ }
/**#@-*/
/**
* Compounded regular expression. Any of
* the contained patterns could match and
- * when one does it's label is returned.
+ * when one does, it's label is returned.
* @package SimpleTest
* @subpackage WebTester
*/
@@ -441,57 +451,27 @@
}
/**
- * Converts HTML tokens into selected SAX events.
+ * Breas HTML into SAX events.
* @package SimpleTest
* @subpackage WebTester
*/
- class SimpleSaxParser {
- protected $_lexer;
- protected $_listener;
- protected $_tag;
- protected $_attributes;
- protected $_current_attribute;
-
- /**
- * Sets the listener.
- * @param SimpleSaxListener $listener SAX event handler.
- * @access public
- */
- function SimpleSaxParser($listener) {
- $this->_listener = $listener;
- $this->_lexer = $this->createLexer($this);
- $this->_tag = '';
- $this->_attributes = array();
- $this->_current_attribute = '';
- }
-
- /**
- * Runs the content through the lexer which
- * should call back to the acceptors.
- * @param string $raw Page text to parse.
- * @return boolean False if parse error.
- * @access public
- */
- function parse($raw) {
- return $this->_lexer->parse($raw);
- }
+ class SimpleHtmlLexer extends SimpleLexer {
/**
- * Sets up the matching lexer. Starts in 'text' mode.
- * @param SimpleSaxParser $parser Event generator, usually $self.
- * @return SimpleLexer Lexer suitable for this parser.
+ * Sets up the lexer with case insensitive matching
+ * and adds the HTML handlers.
+ * @param SimpleSaxParser $parser Handling strategy by
+ * reference.
* @access public
- * @static
*/
- function createLexer($parser) {
- $lexer = new SimpleLexer($parser, 'text');
- $lexer->mapHandler('text', 'acceptTextToken');
- SimpleSaxParser::_addSkipping($lexer);
- foreach (SimpleSaxParser::_getParsedTags() as $tag) {
- SimpleSaxParser::_addTag($lexer, $tag);
+ function SimpleHtmlLexer($parser) {
+ $this->SimpleLexer($parser, 'text');
+ $this->mapHandler('text', 'acceptTextToken');
+ $this->_addSkipping();
+ foreach ($this->_getParsedTags() as $tag) {
+ $this->_addTag($tag);
}
- SimpleSaxParser::_addInTagTokens($lexer);
- return $lexer;
+ $this->_addInTagTokens();
}
/**
@@ -501,72 +481,114 @@
*/
function _getParsedTags() {
return array('a', 'title', 'form', 'input', 'button', 'textarea', 'select',
- 'option', 'frameset', 'frame');
+ 'option', 'frameset', 'frame', 'label');
}
/**
* The lexer has to skip certain sections such
* as server code, client code and styles.
- * @param SimpleLexer $lexer Lexer to add patterns to.
* @access private
- * @static
*/
- function _addSkipping($lexer) {
- $lexer->mapHandler('css', 'ignore');
- $lexer->addEntryPattern('<style', 'text', 'css');
- $lexer->addExitPattern('</style>', 'css');
- $lexer->mapHandler('js', 'ignore');
- $lexer->addEntryPattern('<script', 'text', 'js');
- $lexer->addExitPattern('</script>', 'js');
- $lexer->mapHandler('comment', 'ignore');
- $lexer->addEntryPattern('<!--', 'text', 'comment');
- $lexer->addExitPattern('-->', 'comment');
+ function _addSkipping() {
+ $this->mapHandler('css', 'ignore');
+ $this->addEntryPattern('<style', 'text', 'css');
+ $this->addExitPattern('</style>', 'css');
+ $this->mapHandler('js', 'ignore');
+ $this->addEntryPattern('<script', 'text', 'js');
+ $this->addExitPattern('</script>', 'js');
+ $this->mapHandler('comment', 'ignore');
+ $this->addEntryPattern('<!--', 'text', 'comment');
+ $this->addExitPattern('-->', 'comment');
}
/**
* Pattern matches to start and end a tag.
- * @param SimpleLexer $lexer Lexer to add patterns to.
* @param string $tag Name of tag to scan for.
* @access private
- * @static
*/
- function _addTag($lexer, $tag) {
- $lexer->addSpecialPattern("</$tag>", 'text', 'acceptEndToken');
- $lexer->addEntryPattern("<$tag", 'text', 'tag');
+ function _addTag($tag) {
+ $this->addSpecialPattern("</$tag>", 'text', 'acceptEndToken');
+ $this->addEntryPattern("<$tag", 'text', 'tag');
}
/**
* Pattern matches to parse the inside of a tag
* including the attributes and their quoting.
- * @param SimpleLexer $lexer Lexer to add patterns to.
* @access private
- * @static
*/
- function _addInTagTokens($lexer) {
- $lexer->mapHandler('tag', 'acceptStartToken');
- $lexer->addSpecialPattern('\s+', 'tag', 'ignore');
- SimpleSaxParser::_addAttributeTokens($lexer);
- $lexer->addExitPattern('>', 'tag');
+ function _addInTagTokens() {
+ $this->mapHandler('tag', 'acceptStartToken');
+ $this->addSpecialPattern('\s+', 'tag', 'ignore');
+ $this->_addAttributeTokens();
+ $this->addExitPattern('/>', 'tag');
+ $this->addExitPattern('>', 'tag');
}
/**
* Matches attributes that are either single quoted,
* double quoted or unquoted.
- * @param SimpleLexer $lexer Lexer to add patterns to.
* @access private
+ */
+ function _addAttributeTokens() {
+ $this->mapHandler('dq_attribute', 'acceptAttributeToken');
+ $this->addEntryPattern('=\s*"', 'tag', 'dq_attribute');
+ $this->addPattern("\\\\\"", 'dq_attribute');
+ $this->addExitPattern('"', 'dq_attribute');
+ $this->mapHandler('sq_attribute', 'acceptAttributeToken');
+ $this->addEntryPattern("=\s*'", 'tag', 'sq_attribute');
+ $this->addPattern("\\\\'", 'sq_attribute');
+ $this->addExitPattern("'", 'sq_attribute');
+ $this->mapHandler('uq_attribute', 'acceptAttributeToken');
+ $this->addSpecialPattern('=\s*[^>\s]*', 'tag', 'uq_attribute');
+ }
+ }
+
+ /**
+ * Converts HTML tokens into selected SAX events.
+ * @package SimpleTest
+ * @subpackage WebTester
+ */
+ class SimpleHtmlSaxParser {
+ protected $_lexer;
+ protected $_listener;
+ protected $_tag;
+ protected $_attributes;
+ protected $_current_attribute;
+
+ /**
+ * Sets the listener.
+ * @param SimpleSaxListener $listener SAX event handler.
+ * @access public
+ */
+ function SimpleHtmlSaxParser($listener) {
+ $this->_listener = $listener;
+ $this->_lexer = $this->createLexer($this);
+ $this->_tag = '';
+ $this->_attributes = array();
+ $this->_current_attribute = '';
+ }
+
+ /**
+ * Runs the content through the lexer which
+ * should call back to the acceptors.
+ * @param string $raw Page text to parse.
+ * @return boolean False if parse error.
+ * @access public
+ */
+ function parse($raw) {
+ return $this->_lexer->parse($raw);
+ }
+
+ /**
+ * Sets up the matching lexer. Starts in 'text' mode.
+ * @param SimpleSaxParser $parser Event generator, usually $self.
+ * @return SimpleLexer Lexer suitable for this parser.
+ * @access public
* @static
*/
- function _addAttributeTokens($lexer) {
- $lexer->mapHandler('dq_attribute', 'acceptAttributeToken');
- $lexer->addEntryPattern('=\s*"', 'tag', 'dq_attribute');
- $lexer->addPattern("\\\\\"", 'dq_attribute');
- $lexer->addExitPattern('"', 'dq_attribute');
- $lexer->mapHandler('sq_attribute', 'acceptAttributeToken');
- $lexer->addEntryPattern("=\s*'", 'tag', 'sq_attribute');
- $lexer->addPattern("\\\\'", 'sq_attribute');
- $lexer->addExitPattern("'", 'sq_attribute');
- $lexer->mapHandler('uq_attribute', 'acceptAttributeToken');
- $lexer->addSpecialPattern('=\s*[^>\s]*', 'tag', 'uq_attribute');
+ function &createLexer($parser) {
+ $lexer = new SimpleHtmlLexer($parser);
+ return $lexer;
}
/**
@@ -589,13 +611,13 @@
$success = $this->_listener->startElement(
$this->_tag,
$this->_attributes);
- $this->_tag = "";
+ $this->_tag = '';
$this->_attributes = array();
return $success;
}
- if ($token != "=") {
- $this->_current_attribute = strtolower(SimpleSaxParser::decodeHtml($token));
- $this->_attributes[$this->_current_attribute] = "";
+ if ($token != '=') {
+ $this->_current_attribute = strtolower(SimpleHtmlSaxParser::decodeHtml($token));
+ $this->_attributes[$this->_current_attribute] = '';
}
return true;
}
@@ -625,11 +647,11 @@
function acceptAttributeToken($token, $event) {
if ($event == LEXER_UNMATCHED) {
$this->_attributes[$this->_current_attribute] .=
- SimpleSaxParser::decodeHtml($token);
+ SimpleHtmlSaxParser::decodeHtml($token);
}
if ($event == LEXER_SPECIAL) {
$this->_attributes[$this->_current_attribute] .=
- preg_replace('/^=\s*/' , '', SimpleSaxParser::decodeHtml($token));
+ preg_replace('/^=\s*/' , '', SimpleHtmlSaxParser::decodeHtml($token));
}
return true;
}
@@ -692,11 +714,12 @@
* @static
*/
function normalise($html) {
- $text = preg_replace('|<img.*?alt\s*=\s*"(.*?)".*?>|', ' \1 ', $html);
+ $text = preg_replace('|<!--.*?-->|', '', $html);
+ $text = preg_replace('|<img.*?alt\s*=\s*"(.*?)".*?>|', ' \1 ', $text);
$text = preg_replace('|<img.*?alt\s*=\s*\'(.*?)\'.*?>|', ' \1 ', $text);
$text = preg_replace('|<img.*?alt\s*=\s*([a-zA-Z_]+).*?>|', ' \1 ', $text);
$text = preg_replace('|<.*?>|', '', $text);
- $text = SimpleSaxParser::decodeHtml($text);
+ $text = SimpleHtmlSaxParser::decodeHtml($text);
$text = preg_replace('|\s+|', ' ', $text);
return trim($text);
}