diff options
Diffstat (limited to 'buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Token.php')
-rw-r--r-- | buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Token.php | 170 |
1 files changed, 170 insertions, 0 deletions
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Token.php b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Token.php new file mode 100644 index 00000000..a60d5d96 --- /dev/null +++ b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Token.php @@ -0,0 +1,170 @@ +<?php +/** + * Zend Framework + * + * LICENSE + * + * This source file is subject to version 1.0 of the Zend Framework + * license, that is bundled with this package in the file LICENSE, and + * is available through the world-wide-web at the following URL: + * http://www.zend.com/license/framework/1_0.txt. If you did not receive + * a copy of the Zend Framework license and are unable to obtain it + * through the world-wide-web, please send a note to license@zend.com + * so we can mail you a copy immediately. + * + * @package Zend_Search_Lucene + * @subpackage document + * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) + * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 + */ + + +/** + * + * @package Zend_Search_Lucene + * @subpackage Analysis + * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) + * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 + */ +class Zend_Search_Lucene_Analysis_Token +{ + /** + * The text of the term. + * + * @var string + */ + private $_termText; + + /** + * Start in source text. + * + * @var integer + */ + private $_startOffset; + + /** + * End in source text + * + * @var integer + */ + private $_endOffset; + + /** + * Lexical type. + * + * @var string + */ + private $_type; + + /** + * The position of this token relative to the previous Token. + * + * The default value is one. + * + * Some common uses for this are: + * Set it to zero to put multiple terms in the same position. This is + * useful if, e.g., a word has multiple stems. Searches for phrases + * including either stem will match. In this case, all but the first stem's + * increment should be set to zero: the increment of the first instance + * should be one. Repeating a token with an increment of zero can also be + * used to boost the scores of matches on that token. + * + * Set it to values greater than one to inhibit exact phrase matches. + * If, for example, one does not want phrases to match across removed stop + * words, then one could build a stop word filter that removes stop words and + * also sets the increment to the number of stop words removed before each + * non-stop word. Then exact phrase queries will only match when the terms + * occur with no intervening stop words. + * + * @var integer + */ + private $_positionIncrement; + + + /** + * Object constructor + * + * @param string $text + * @param integer $start + * @param integer $end + * @param string $type + */ + public function __construct($text, $start, $end, $type = 'word' ) + { + $this->_termText = $text; + $this->_startOffset = $start; + $this->_endOffset = $end; + $this->_type = $type; + + $this->_positionIncrement = 1; + } + + + /** + * positionIncrement setter + * + * @param integer $positionIncrement + */ + public function setPositionIncrement($positionIncrement) + { + $this->_positionIncrement = $positionIncrement; + } + + /** + * Returns the position increment of this Token. + * + * @return integer + */ + public function getPositionIncrement() + { + return $this->_positionIncrement; + } + + /** + * Returns the Token's term text. + * + * @return string + */ + public function getTermText() + { + return $this->_termText; + } + + /** + * Returns this Token's starting offset, the position of the first character + * corresponding to this token in the source text. + * + * Note: + * The difference between getEndOffset() and getStartOffset() may not be equal + * to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered + * by a stemmer or some other filter. + * + * @return integer + */ + public function getStartOffset() + { + return $this->_startOffset; + } + + /** + * Returns this Token's ending offset, one greater than the position of the + * last character corresponding to this token in the source text. + * + * @return integer + */ + public function getEndOffset() + { + return $this->_endOffset; + } + + /** + * Returns this Token's lexical type. Defaults to 'word'. + * + * @return string + */ + public function getType() + { + return $this->_type; + } +} + |