<?php // // +------------------------------------------------------------------------+ // | phpDocumentor | // +------------------------------------------------------------------------+ // | Copyright (c) 2000-2003 Joshua Eichorn, Gregory Beaver | // | Email jeichorn@phpdoc.org, cellog@phpdoc.org | // | Web http://www.phpdoc.org | // | Mirror http://phpdocu.sourceforge.net/ | // | PEAR http://pear.php.net/package-info.php?pacid=137 | // +------------------------------------------------------------------------+ // | This source file is subject to version 3.00 of the PHP License, | // | that is available at http://www.php.net/license/3_0.txt. | // | If you did not receive a copy of the PHP license and are unable to | // | obtain it through the world-wide-web, please send a note to | // | license@php.net so we can mail you a copy immediately. | // +------------------------------------------------------------------------+ // /** * @author Joshua Eichorn <jeichorn@phpdoc.org> * @version $Id: WordParser.inc,v 1.1 2005/10/17 18:36:57 jeichorn Exp $ * @package phpDocumentor * @subpackage WordParsers */ /** * Retrieves tokens from source code for use by the Parser * @see Parser * @author Joshua Eichorn <jeichorn@phpdoc.org> * @version $Id: WordParser.inc,v 1.1 2005/10/17 18:36:57 jeichorn Exp $ * @package phpDocumentor * @subpackage WordParsers */ class WordParser { /* New lines around the world Macintosh: \r Unix : \n Windows : \r\n */ /**#@+ * @access private */ /** * List of text that separates tokens, used to retrieve tokens * @var array */ var $wordseperators = array(); /** * Position within input of the cursor pointing to the next text to be * retrieved as a token * @var integer */ var $pos = 0; /** * Size of the input source code * @var integer */ var $size; /** * Source code * @var string */ var $data; var $cache; /** * Current line number * @var integer */ var $linenum = 0; /** * Position the cursor was at the last time line numbers were counted, used * to guarantee that line numbers are incremented * @var integer */ var $linenumpos = 0; /** * Used for {@}source} tag, contains currently parsed function source * @var string */ var $source = ''; /** * flag, determines whether tokens are added to {@link $source} * @var boolean */ var $getsource = false; /** * If true, then white space is returned as a part of tokens, otherwise * tokens are trimmed * @var boolean */ var $returnWhiteSpace = false; /**#@-*/ /** * Initialize the WordParser * @param string source code */ function setup(&$input) { $this->size = strlen($input); $this->data = & $input; $this->pos = 0; $this->linenum = 0; $this->linenumpos = 0; $this->cache = array(); //$this->run = 0; //$this->word = WORD_PARSER_RET_WORD; } /** * Retrieve source code for the last function/method * @return string */ function getSource() { $source = $this->source; $this->source = ''; $this->getsource = false; return $source; } /** * Used to tell the WordParser to start retrieving source code * @access private */ function retrievesource($word = '') { $this->source = $word; $this->getsource = true; } /** * Retrieve a token from the token list * * The {@link Parser} class relies upon this method to retrieve the next * token. The {@link $wordseperators} array is a collection of strings * that delineate tokens for the current parser state. $wordseperators * is set by the parser with a call to {@link Parser::configWordParser()} * every time a new parser state is reached. * * For example, while parsing the source code for a class, the word * <code>var</code> is a token, and <code>global</code> is not, * but inside a function, the reverse is true. The parser state * {@link PARSER_STATE_CLASS} has a token list that includes whitespace, * code delimiters like ; and {}, and comment/DocBlock indicators * * If the whitespace option has been turned off using * {@link setWhitespace()}, then no whitespace is returned with tokens * * {@internal * In the first segment of the function, the code attempts to find the next * token. A cache is used to speed repetitious tasks. The $tpos variable * is used to hold the position of the next token. $npos is used to * hold the end of the token, and so $npos - $tpos will give the length * of the token. This is used to allow tokens that contain whitespace, * should that option be desired. * * {@link $data} is of course the string containing the PHP code to be * parsed, and {@link $pos} is the cursor, or current location within the * parsed data. * }} * @return string|false the next token, an empty string if there are no * token separators in the $wordseperators array, * or false if the end of input has been reached */ function getWord() { //$st = $this->mtime(); if ($this->size == $this->pos) { return false; } // assume, for starting, that the token is from $this->pos to the end $npos = $this->size; if (is_array($this->wordseperators)) { //$this->wordseperators = array(); foreach($this->wordseperators as $sep) { // cache is set if this separator has been tested if (isset($this->cache[$sep])) $tpos = $this->cache[$sep]; else $tpos = false; if ($tpos < $this->pos || !is_int($tpos)) { // find the position of the next token separator $tpos = strpos($this->data,$sep,$this->pos); } // was a token separator found that is closer to the current // location? if ( ($tpos < $npos) && !($tpos === false)) { //echo trim($sep) . "=$tpos\n"; // set the length of the token to be from $this->pos to // the next token separator $npos = $tpos; $seplen = strlen($sep); } else if (!($tpos === false)) { $this->cache[$sep] = $tpos; } } } else { // no token separators, tell the parser to choose a new state return ""; } $len = $npos - $this->pos; if ($len == 0) { $len = $seplen; } //$st3 = $this->mtime(); $word = substr($this->data,$this->pos,$len); // Change random other os newlines to the unix one if ($word == "\r" || $word == "\r\n") { $word = "\n"; } if ($this->linenumpos <= $this->pos) { $this->linenumpos = $this->pos + $len; $this->linenum += count(explode("\n",$word)) - 1; } if ($this->getsource) { $this->source .= $word; } $this->pos = $this->pos + $len; //$this->word = WORD_PARSER_RET_SEP; // Things like // commenats rely on the newline to find their end so im going to have to return them // never return worthless white space /t ' ' if ($this->returnWhiteSpace == false) { if (strlen(trim($word)) == 0 && $word != "\n") { $word = $this->getWord(); } } //$this->time3 = $this->time3 + ($this->mtime() - $st3); //$this->time = $this->time + ($this->mtime() - $st); return $word; } /** * Returns the current pointer position, or 1 character after the end of the word */ function getPos() { return $this->pos; } /** * Unused * * {@source} * @param integer starting position * @param integer length of block to retrieve */ function getBlock($start,$len) { return substr($this->data,$start,$len); } /** * @uses $wordseperators * @param array array of strings that separate tokens */ function setSeperator(&$seps) { $this->wordseperators = &$seps; } /** * Set the internal cursor within the source code * @param integer */ function setPos($pos) { $this->pos = $pos; } /** * Backup to the previous token so that it can be retrieved again in a new * context. * * Occasionally, a word will be passed to an event handler that should be * handled by another event handler. This method allows that to happen. * @param string token to back up to */ function backupPos($word) { if ($this->getsource) $this->source = substr($this->source,0,strlen($this->source) - 1); $this->pos = $this->pos - strlen($word); } /** * set parser to return or strip whitespace * @param boolean */ function setWhitespace($val = false) { $this->returnWhiteSpace = $val; } } ?>