<?php
/**
 * Zend Framework
 *
 * LICENSE
 *
 * This source file is subject to version 1.0 of the Zend Framework
 * license, that is bundled with this package in the file LICENSE, and
 * is available through the world-wide-web at the following URL:
 * http://www.zend.com/license/framework/1_0.txt. If you did not receive
 * a copy of the Zend Framework license and are unable to obtain it
 * through the world-wide-web, please send a note to license@zend.com
 * so we can mail you a copy immediately.
 *
 * @package    Zend_Search_Lucene
 * @subpackage document
 * @copyright  Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
 */


/**
 *
 * @package    Zend_Search_Lucene
 * @subpackage Analysis
 * @copyright  Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
 */
class Zend_Search_Lucene_Analysis_Token
{
    /**
     * The text of the term.
     *
     * @var string
     */
    private $_termText;

    /**
     * Start in source text.
     *
     * @var integer
     */
    private $_startOffset;

    /**
     * End in source text
     *
     * @var integer
     */
    private $_endOffset;

    /**
     * Lexical type.
     *
     * @var string
     */
    private $_type;

    /**
     * The position of this token relative to the previous Token.
     *
     * The default value is one.
     *
     * Some common uses for this are:
     * Set it to zero to put multiple terms in the same position.  This is
     * useful if, e.g., a word has multiple stems.  Searches for phrases
     * including either stem will match.  In this case, all but the first stem's
     * increment should be set to zero: the increment of the first instance
     * should be one.  Repeating a token with an increment of zero can also be
     * used to boost the scores of matches on that token.
     *
     * Set it to values greater than one to inhibit exact phrase matches.
     * If, for example, one does not want phrases to match across removed stop
     * words, then one could build a stop word filter that removes stop words and
     * also sets the increment to the number of stop words removed before each
     * non-stop word.  Then exact phrase queries will only match when the terms
     * occur with no intervening stop words.
     *
     * @var integer
     */
    private $_positionIncrement;


    /**
     * Object constructor
     *
     * @param string  $text
     * @param integer $start
     * @param integer $end
     * @param string  $type
     */
    public function __construct($text, $start, $end, $type = 'word' )
    {
        $this->_termText    = $text;
        $this->_startOffset = $start;
        $this->_endOffset   = $end;
        $this->_type        = $type;

        $this->_positionIncrement = 1;
    }


    /**
     * positionIncrement setter
     *
     * @param integer $positionIncrement
     */
    public function setPositionIncrement($positionIncrement)
    {
        $this->_positionIncrement = $positionIncrement;
    }

    /**
     * Returns the position increment of this Token.
     *
     * @return integer
     */
    public function getPositionIncrement()
    {
        return $this->_positionIncrement;
    }

    /**
     * Returns the Token's term text.
     *
     * @return string
     */
    public function getTermText()
    {
        return $this->_termText;
    }

    /**
     * Returns this Token's starting offset, the position of the first character
     * corresponding to this token in the source text.
     *
     * Note:
     * The difference between getEndOffset() and getStartOffset() may not be equal
     * to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered
     * by a stemmer or some other filter.
     *
     * @return integer
     */
    public function getStartOffset()
    {
        return $this->_startOffset;
    }

    /**
     * Returns this Token's ending offset, one greater than the position of the
     * last character corresponding to this token in the source text.
     *
     * @return integer
     */
    public function getEndOffset()
    {
        return $this->_endOffset;
    }

    /**
     * Returns this Token's lexical type.  Defaults to 'word'.
     *
     * @return string
     */
    public function getType()
    {
        return $this->_type;
    }
}