* @copyright 2000-2007 Joshua Eichorn * @license http://www.opensource.org/licenses/lgpl-license.php LGPL * @version CVS: $Id: WordParser.inc 246145 2007-11-14 01:37:03Z ashnazg $ * @link http://www.phpdoc.org * @link http://pear.php.net/PhpDocumentor * @since 0.1 * @todo CS cleanup - change package to PhpDocumentor */ /** * Retrieves tokens from source code for use by the Parser * * @category ToolsAndUtilities * @package phpDocumentor * @subpackage WordParsers * @author Joshua Eichorn * @copyright 2000-2007 Joshua Eichorn * @license http://www.opensource.org/licenses/lgpl-license.php LGPL * @version Release: 1.4.3 * @link http://www.phpdoc.org * @link http://pear.php.net/PhpDocumentor * @see Parser * @todo CS cleanup - change package to PhpDocumentor */ class WordParser { /* New lines around the world Macintosh: \r Unix : \n Windows : \r\n */ /**#@+ * @access private */ /** * List of text that separates tokens, used to retrieve tokens * @var array */ var $wordseperators = array(); /** * Position within input of the cursor pointing to the next text to be * retrieved as a token * @var integer */ var $pos = 0; /** * Size of the input source code * @var integer */ var $size; /** * Source code * @var string */ var $data; var $cache; /** * Current line number * @var integer */ var $linenum = 0; /** * Position the cursor was at the last time line numbers were counted, used * to guarantee that line numbers are incremented * @var integer */ var $linenumpos = 0; /** * Used for {@}source} tag, contains currently parsed function source * @var string */ var $source = ''; /** * flag, determines whether tokens are added to {@link $source} * @var boolean */ var $getsource = false; /** * If true, then white space is returned as a part of tokens, otherwise * tokens are trimmed * @var boolean */ var $returnWhiteSpace = false; /**#@-*/ /** * Initialize the WordParser * * @param string &$input source code * * @return void */ function setup(&$input) { $this->size = strlen($input); $this->data = & $input; $this->pos = 0; $this->linenum = 0; $this->linenumpos = 0; $this->cache = array(); //$this->run = 0; //$this->word = WORD_PARSER_RET_WORD; } /** * Retrieve source code for the last function/method * * @return string */ function getSource() { $source = $this->source; $this->source = ''; $this->getsource = false; return $source; } /** * Used to tell the WordParser to start retrieving source code * * @param string $word source code * * @return void * @access private */ function retrievesource($word = '') { $this->source = $word; $this->getsource = true; } /** * Retrieve a token from the token list * * The {@link Parser} class relies upon this method to retrieve the next * token. The {@link $wordseperators} array is a collection of strings * that delineate tokens for the current parser state. $wordseperators * is set by the parser with a call to {@link Parser::configWordParser()} * every time a new parser state is reached. * * For example, while parsing the source code for a class, the word * var is a token, and global is not, * but inside a function, the reverse is true. The parser state * {@link PARSER_STATE_CLASS} has a token list that includes whitespace, * code delimiters like ; and {}, and comment/DocBlock indicators * * If the whitespace option has been turned off using * {@link setWhitespace()}, then no whitespace is returned with tokens * * {@internal * In the first segment of the function, the code attempts to find the next * token. A cache is used to speed repetitious tasks. The $tpos variable * is used to hold the position of the next token. $npos is used to * hold the end of the token, and so $npos - $tpos will give the length * of the token. This is used to allow tokens that contain whitespace, * should that option be desired. * * {@link $data} is of course the string containing the PHP code to be * parsed, and {@link $pos} is the cursor, or current location within the * parsed data. * }} * * @return string|false the next token, an empty string if there are no * token separators in the $wordseperators array, * or false if the end of input has been reached */ function getWord() { //$st = $this->mtime(); if ($this->size == $this->pos) { return false; } // assume, for starting, that the token is from $this->pos to the end $npos = $this->size; if (is_array($this->wordseperators)) { //$this->wordseperators = array(); foreach ($this->wordseperators as $sep) { // cache is set if this separator has been tested if (isset($this->cache[$sep])) { $tpos = $this->cache[$sep]; } else { $tpos = false; } if ($tpos < $this->pos || !is_int($tpos)) { // find the position of the next token separator $tpos = strpos($this->data, $sep, $this->pos); } // was a token separator found // that is closer to the current location? if ( ($tpos < $npos) && !($tpos === false)) { //echo trim($sep) . "=$tpos\n"; // set the length of the token // to be from $this->pos to // the next token separator $npos = $tpos; $seplen = strlen($sep); } else if (!($tpos === false)) { $this->cache[$sep] = $tpos; } } } else { // no token separators, tell the parser to choose a new state return ""; } $len = $npos - $this->pos; if ($len == 0) { $len = $seplen; } //$st3 = $this->mtime(); $word = substr($this->data, $this->pos, $len); // Change random other os newlines to the unix one if ($word == "\r" || $word == "\r\n") { $word = "\n"; } if ($this->linenumpos <= $this->pos) { $this->linenumpos = $this->pos + $len; $this->linenum += count(explode("\n", $word)) - 1; } if ($this->getsource) { $this->source .= $word; } $this->pos = $this->pos + $len; //$this->word = WORD_PARSER_RET_SEP; // Things like // commenats rely on the newline // to find their end so im going to have to return them // never return worthless white space /t ' ' if ($this->returnWhiteSpace == false) { if (strlen(trim($word)) == 0 && $word != "\n") { $word = $this->getWord(); } } //$this->time3 = $this->time3 + ($this->mtime() - $st3); //$this->time = $this->time + ($this->mtime() - $st); return $word; } /** * Returns the current pointer position, or 1 character after the end of the word * * @return int the position */ function getPos() { return $this->pos; } /** * Unused * * {@source} * * @param integer $start starting position * @param integer $len length of block to retrieve * * @return string the requested block of characters */ function getBlock($start, $len) { return substr($this->data, $start, $len); } /** * Sets the list of possible separator tokens * * @param array &$seps array of strings that separate tokens * * @return void * @uses $wordseperators */ function setSeperator(&$seps) { $this->wordseperators = &$seps; } /** * Set the internal cursor within the source code * * @param integer $pos the position * * @return void */ function setPos($pos) { $this->pos = $pos; } /** * Backup to the previous token so that it can be retrieved again in a new * context. * * Occasionally, a word will be passed to an event handler that should be * handled by another event handler. This method allows that to happen. * * @param string $word token to back up to * * @return void */ function backupPos($word) { if ($this->getsource) $this->source = substr($this->source, 0, strlen($this->source) - 1); $this->pos = $this->pos - strlen($word); } /** * set parser to return or strip whitespace * * @param boolean $val flag to return or strip whitespace * * @return void */ function setWhitespace($val = false) { $this->returnWhiteSpace = $val; } } ?>