summaryrefslogtreecommitdiff
path: root/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Token.php
diff options
context:
space:
mode:
Diffstat (limited to 'buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Token.php')
-rw-r--r--buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Token.php170
1 files changed, 170 insertions, 0 deletions
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Token.php b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Token.php
new file mode 100644
index 00000000..a60d5d96
--- /dev/null
+++ b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Token.php
@@ -0,0 +1,170 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to version 1.0 of the Zend Framework
+ * license, that is bundled with this package in the file LICENSE, and
+ * is available through the world-wide-web at the following URL:
+ * http://www.zend.com/license/framework/1_0.txt. If you did not receive
+ * a copy of the Zend Framework license and are unable to obtain it
+ * through the world-wide-web, please send a note to license@zend.com
+ * so we can mail you a copy immediately.
+ *
+ * @package Zend_Search_Lucene
+ * @subpackage document
+ * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
+ */
+
+
+/**
+ *
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
+ */
+class Zend_Search_Lucene_Analysis_Token
+{
+ /**
+ * The text of the term.
+ *
+ * @var string
+ */
+ private $_termText;
+
+ /**
+ * Start in source text.
+ *
+ * @var integer
+ */
+ private $_startOffset;
+
+ /**
+ * End in source text
+ *
+ * @var integer
+ */
+ private $_endOffset;
+
+ /**
+ * Lexical type.
+ *
+ * @var string
+ */
+ private $_type;
+
+ /**
+ * The position of this token relative to the previous Token.
+ *
+ * The default value is one.
+ *
+ * Some common uses for this are:
+ * Set it to zero to put multiple terms in the same position. This is
+ * useful if, e.g., a word has multiple stems. Searches for phrases
+ * including either stem will match. In this case, all but the first stem's
+ * increment should be set to zero: the increment of the first instance
+ * should be one. Repeating a token with an increment of zero can also be
+ * used to boost the scores of matches on that token.
+ *
+ * Set it to values greater than one to inhibit exact phrase matches.
+ * If, for example, one does not want phrases to match across removed stop
+ * words, then one could build a stop word filter that removes stop words and
+ * also sets the increment to the number of stop words removed before each
+ * non-stop word. Then exact phrase queries will only match when the terms
+ * occur with no intervening stop words.
+ *
+ * @var integer
+ */
+ private $_positionIncrement;
+
+
+ /**
+ * Object constructor
+ *
+ * @param string $text
+ * @param integer $start
+ * @param integer $end
+ * @param string $type
+ */
+ public function __construct($text, $start, $end, $type = 'word' )
+ {
+ $this->_termText = $text;
+ $this->_startOffset = $start;
+ $this->_endOffset = $end;
+ $this->_type = $type;
+
+ $this->_positionIncrement = 1;
+ }
+
+
+ /**
+ * positionIncrement setter
+ *
+ * @param integer $positionIncrement
+ */
+ public function setPositionIncrement($positionIncrement)
+ {
+ $this->_positionIncrement = $positionIncrement;
+ }
+
+ /**
+ * Returns the position increment of this Token.
+ *
+ * @return integer
+ */
+ public function getPositionIncrement()
+ {
+ return $this->_positionIncrement;
+ }
+
+ /**
+ * Returns the Token's term text.
+ *
+ * @return string
+ */
+ public function getTermText()
+ {
+ return $this->_termText;
+ }
+
+ /**
+ * Returns this Token's starting offset, the position of the first character
+ * corresponding to this token in the source text.
+ *
+ * Note:
+ * The difference between getEndOffset() and getStartOffset() may not be equal
+ * to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered
+ * by a stemmer or some other filter.
+ *
+ * @return integer
+ */
+ public function getStartOffset()
+ {
+ return $this->_startOffset;
+ }
+
+ /**
+ * Returns this Token's ending offset, one greater than the position of the
+ * last character corresponding to this token in the source text.
+ *
+ * @return integer
+ */
+ public function getEndOffset()
+ {
+ return $this->_endOffset;
+ }
+
+ /**
+ * Returns this Token's lexical type. Defaults to 'word'.
+ *
+ * @return string
+ */
+ public function getType()
+ {
+ return $this->_type;
+ }
+}
+