<?php
/**
 * a generic lexer
 * 
 * phpDocumentor :: automatic documentation generator
 * 
 * PHP versions 4 and 5
 *
 * Copyright (c) 2000-2007 Joshua Eichorn
 * 
 * LICENSE:
 * 
 * This library is free software; you can redistribute it
 * and/or modify it under the terms of the GNU Lesser General
 * Public License as published by the Free Software Foundation;
 * either version 2.1 of the License, or (at your option) any
 * later version.
 * 
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * @category   ToolsAndUtilities
 * @package    phpDocumentor
 * @subpackage WordParsers
 * @author     Joshua Eichorn <jeichorn@phpdoc.org>
 * @copyright  2000-2007 Joshua Eichorn
 * @license    http://www.opensource.org/licenses/lgpl-license.php LGPL
 * @version    CVS: $Id: WordParser.inc 246145 2007-11-14 01:37:03Z ashnazg $
 * @link       http://www.phpdoc.org
 * @link       http://pear.php.net/PhpDocumentor
 * @since      0.1
 * @todo       CS cleanup - change package to PhpDocumentor
 */

/**
 * Retrieves tokens from source code for use by the Parser
 *
 * @category   ToolsAndUtilities
 * @package    phpDocumentor
 * @subpackage WordParsers
 * @author     Joshua Eichorn <jeichorn@phpdoc.org>
 * @copyright  2000-2007 Joshua Eichorn
 * @license    http://www.opensource.org/licenses/lgpl-license.php LGPL
 * @version    Release: 1.4.3
 * @link       http://www.phpdoc.org
 * @link       http://pear.php.net/PhpDocumentor
 * @see        Parser
 * @todo       CS cleanup - change package to PhpDocumentor
 */
class WordParser
{
    /*
    New lines around the world
    Macintosh: \r 
        Unix : \n 
    Windows : \r\n 
     */
    
    /**#@+
     * @access private
     */
    /**
     * List of text that separates tokens, used to retrieve tokens
     * @var array
     */
    var $wordseperators = array();
    
    /**
     * Position within input of the cursor pointing to the next text to be
     * retrieved as a token
     * @var integer
     */
    var $pos = 0;

    /**
     * Size of the input source code
     * @var integer
     */
    var $size;

    /**
     * Source code
     * @var string
     */
    var $data;

    var $cache;
    /**
     * Current line number
     * @var integer
     */
    var $linenum = 0;
    /**
     * Position the cursor was at the last time line numbers were counted, used
     * to guarantee that line numbers are incremented
     * @var integer
     */
    var $linenumpos = 0;
    
    /**
     * Used for {@}source} tag, contains currently parsed function source
     * @var string
     */
    var $source = '';
    /**
     * flag, determines whether tokens are added to {@link $source}
     * @var boolean
     */
    var $getsource = false;

    /**
     * If true, then white space is returned as a part of tokens, otherwise
     * tokens are trimmed
     * @var boolean
     */
    var $returnWhiteSpace = false;
    /**#@-*/

    /**
     * Initialize the WordParser
     *
     * @param string &$input source code
     * 
     * @return void
     */
    function setup(&$input)
    {
        $this->size       = strlen($input);
        $this->data       = & $input;
        $this->pos        = 0;
        $this->linenum    = 0;
        $this->linenumpos = 0;
        $this->cache      = array();
        //$this->run      = 0;
        //$this->word     = WORD_PARSER_RET_WORD;
    }
    
    /**
     * Retrieve source code for the last function/method
     *
     * @return string
     */
    function getSource()
    {
        $source          = $this->source;
        $this->source    = '';
        $this->getsource = false;
        return $source;
    }
    
    /**
     * Used to tell the WordParser to start retrieving source code
     *
     * @param string $word source code
     *
     * @return void
     * @access private
     */
    function retrievesource($word = '')
    {
        $this->source    = $word;
        $this->getsource = true;
    }

    /**
     * Retrieve a token from the token list
     *
     * The {@link Parser} class relies upon this method to retrieve the next
     * token.  The {@link $wordseperators} array is a collection of strings
     * that delineate tokens for the current parser state.  $wordseperators
     * is set by the parser with a call to {@link Parser::configWordParser()}
     * every time a new parser state is reached.
     *
     * For example, while parsing the source code for a class, the word
     * <code>var</code> is a token, and <code>global</code> is not,
     * but inside a function, the reverse is true.  The parser state
     * {@link PARSER_STATE_CLASS} has a token list that includes whitespace,
     * code delimiters like ; and {}, and comment/DocBlock indicators
     *
     * If the whitespace option has been turned off using
     * {@link setWhitespace()}, then no whitespace is returned with tokens
     *
     * {@internal
     * In the first segment of the function, the code attempts to find the next
     * token.  A cache is used to speed repetitious tasks.  The $tpos variable
     * is used to hold the position of the next token.  $npos is used to
     * hold the end of the token, and so $npos - $tpos will give the length
     * of the token.  This is used to allow tokens that contain whitespace,
     * should that option be desired.
     *
     * {@link $data} is of course the string containing the PHP code to be
     * parsed, and {@link $pos} is the cursor, or current location within the
     * parsed data.
     * }}
     *
     * @return string|false the next token, an empty string if there are no
     *                      token separators in the $wordseperators array,
     *                      or false if the end of input has been reached
     */
    function getWord()
    {
        //$st = $this->mtime();
        if ($this->size == $this->pos) {
            return false;
        }

        // assume, for starting, that the token is from $this->pos to the end
        $npos = $this->size;
        if (is_array($this->wordseperators)) {
            //$this->wordseperators = array();
            foreach ($this->wordseperators as $sep) {
                // cache is set if this separator has been tested
                if (isset($this->cache[$sep])) {
                    $tpos = $this->cache[$sep];
                } else {
                    $tpos = false;
                }
                if ($tpos < $this->pos || !is_int($tpos)) {
                    // find the position of the next token separator
                    $tpos = strpos($this->data, $sep, $this->pos);
                }

                // was a token separator found 
                // that is closer to the current location?
                if ( ($tpos < $npos) && !($tpos === false)) {
                    //echo trim($sep) . "=$tpos\n";
                    // set the length of the token 
                    // to be from $this->pos to
                    // the next token separator
                    $npos   = $tpos;
                    $seplen = strlen($sep);
                } else if (!($tpos === false)) {
                    $this->cache[$sep] = $tpos;
                }
            }
        } else {
            // no token separators, tell the parser to choose a new state
            return "";
        }

        $len = $npos - $this->pos;
        if ($len == 0) {
            $len = $seplen;
        }

        //$st3 = $this->mtime();
        $word = substr($this->data, $this->pos, $len);
        
        // Change random other os newlines to the unix one
        if ($word == "\r" || $word == "\r\n") {
            $word = "\n";
        }
        
        if ($this->linenumpos <= $this->pos) {
            $this->linenumpos = $this->pos + $len;
            $this->linenum   += count(explode("\n", $word)) - 1;
        }

        if ($this->getsource) {
            $this->source .= $word;
        }
        $this->pos = $this->pos + $len;
        //$this->word = WORD_PARSER_RET_SEP;

        // Things like // commenats rely on the newline 
        // to find their end so im going to have to return them
        // never return worthless white space /t ' '
        if ($this->returnWhiteSpace == false) {
            if (strlen(trim($word)) == 0 && $word != "\n") {
                $word = $this->getWord();
            }
        }
        //$this->time3 = $this->time3 + ($this->mtime() - $st3);
        //$this->time = $this->time + ($this->mtime() - $st);
        return $word;
    }
    

    /**
     * Returns the current pointer position, or 1 character after the end of the word
     *
     * @return int the position
     */
    function getPos()
    {
        return $this->pos;
    }

    /**
     * Unused
     *
     * {@source}
     *
     * @param integer $start starting position
     * @param integer $len   length of block to retrieve
     *
     * @return string the requested block of characters
     */
    function getBlock($start, $len)
    {
        return substr($this->data, $start, $len);
    }

    /**
     * Sets the list of possible separator tokens
     *
     * @param array &$seps array of strings that separate tokens
     *
     * @return void
     * @uses $wordseperators
     */
    function setSeperator(&$seps)
    {
        $this->wordseperators = &$seps;
    }

    /**
     * Set the internal cursor within the source code
     *
     * @param integer $pos the position
     *
     * @return void
     */
    function setPos($pos)
    {
        $this->pos = $pos;
    }
    
    /**
     * Backup to the previous token so that it can be retrieved again in a new
     * context.
     *
     * Occasionally, a word will be passed to an event handler that should be
     * handled by another event handler.  This method allows that to happen.
     *
     * @param string $word token to back up to
     *
     * @return void
     */
    function backupPos($word)
    {
        if ($this->getsource) $this->source = 
            substr($this->source, 0, strlen($this->source) - 1);
        $this->pos = $this->pos - strlen($word);
    }

    /**
     * set parser to return or strip whitespace
     *
     * @param boolean $val flag to return or strip whitespace
     *
     * @return void
     */
    function setWhitespace($val = false)
    {
        $this->returnWhiteSpace = $val;
    }
}
?>