diff options
Diffstat (limited to 'buildscripts/texbuilder/Zend/Search/Lucene/Analysis')
4 files changed, 0 insertions, 364 deletions
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer.php b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer.php deleted file mode 100644 index 8e234c16..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer.php +++ /dev/null @@ -1,94 +0,0 @@ -<?php -/** - * Zend Framework - * - * LICENSE - * - * This source file is subject to version 1.0 of the Zend Framework - * license, that is bundled with this package in the file LICENSE, and - * is available through the world-wide-web at the following URL: - * http://www.zend.com/license/framework/1_0.txt. If you did not receive - * a copy of the Zend Framework license and are unable to obtain it - * through the world-wide-web, please send a note to license@zend.com - * so we can mail you a copy immediately. - * - * @package Zend_Search_Lucene - * @subpackage Analysis - * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) - * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 - */ - - -/** Zend_Search_Lucene_Analysis_Token */ -require_once 'Zend/Search/Lucene/Analysis/Token.php'; - -/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text */ -require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php'; - -/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive */ -require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php'; - - - -/** - * An Analyzer is used to analyze text. - * It thus represents a policy for extracting index terms from text. - * - * Note: - * Lucene Java implementation is oriented to streams. It provides effective work - * with a huge documents (more then 20Mb). - * But engine itself is not oriented such documents. - * Thus Zend_Search_Lucene analysis API works with data strings and sets (arrays). - * - * @package Zend_Search_Lucene - * @subpackage Analysis - * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) - * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 - */ - -abstract class Zend_Search_Lucene_Analysis_Analyzer -{ - /** - * The Analyzer implementation used by default. - * - * @var Zend_Search_Lucene_Analysis_Analyzer - */ - static private $_defaultImpl; - - /** - * Tokenize text to a terms - * Returns array of Zend_Search_Lucene_Analysis_Token objects - * - * @param string $data - * @return array - */ - abstract public function tokenize($data); - - - /** - * Set the default Analyzer implementation used by indexing code. - * - * @param Zend_Search_Lucene_Analysis_Analyzer $similarity - */ - static public function setDefault(Zend_Search_Lucene_Analysis_Analyzer $analyzer) - { - self::$_defaultImpl = $analyzer; - } - - - /** - * Return the default Analyzer implementation used by indexing code. - * - * @return Zend_Search_Lucene_Analysis_Analyzer - */ - static public function getDefault() - { - if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Analysis_Analyzer) { - self::$_defaultImpl = new Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive(); - } - - return self::$_defaultImpl; - } - -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Token.php b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Token.php deleted file mode 100644 index a60d5d96..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Token.php +++ /dev/null @@ -1,170 +0,0 @@ -<?php -/** - * Zend Framework - * - * LICENSE - * - * This source file is subject to version 1.0 of the Zend Framework - * license, that is bundled with this package in the file LICENSE, and - * is available through the world-wide-web at the following URL: - * http://www.zend.com/license/framework/1_0.txt. If you did not receive - * a copy of the Zend Framework license and are unable to obtain it - * through the world-wide-web, please send a note to license@zend.com - * so we can mail you a copy immediately. - * - * @package Zend_Search_Lucene - * @subpackage document - * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) - * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 - */ - - -/** - * - * @package Zend_Search_Lucene - * @subpackage Analysis - * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) - * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 - */ -class Zend_Search_Lucene_Analysis_Token -{ - /** - * The text of the term. - * - * @var string - */ - private $_termText; - - /** - * Start in source text. - * - * @var integer - */ - private $_startOffset; - - /** - * End in source text - * - * @var integer - */ - private $_endOffset; - - /** - * Lexical type. - * - * @var string - */ - private $_type; - - /** - * The position of this token relative to the previous Token. - * - * The default value is one. - * - * Some common uses for this are: - * Set it to zero to put multiple terms in the same position. This is - * useful if, e.g., a word has multiple stems. Searches for phrases - * including either stem will match. In this case, all but the first stem's - * increment should be set to zero: the increment of the first instance - * should be one. Repeating a token with an increment of zero can also be - * used to boost the scores of matches on that token. - * - * Set it to values greater than one to inhibit exact phrase matches. - * If, for example, one does not want phrases to match across removed stop - * words, then one could build a stop word filter that removes stop words and - * also sets the increment to the number of stop words removed before each - * non-stop word. Then exact phrase queries will only match when the terms - * occur with no intervening stop words. - * - * @var integer - */ - private $_positionIncrement; - - - /** - * Object constructor - * - * @param string $text - * @param integer $start - * @param integer $end - * @param string $type - */ - public function __construct($text, $start, $end, $type = 'word' ) - { - $this->_termText = $text; - $this->_startOffset = $start; - $this->_endOffset = $end; - $this->_type = $type; - - $this->_positionIncrement = 1; - } - - - /** - * positionIncrement setter - * - * @param integer $positionIncrement - */ - public function setPositionIncrement($positionIncrement) - { - $this->_positionIncrement = $positionIncrement; - } - - /** - * Returns the position increment of this Token. - * - * @return integer - */ - public function getPositionIncrement() - { - return $this->_positionIncrement; - } - - /** - * Returns the Token's term text. - * - * @return string - */ - public function getTermText() - { - return $this->_termText; - } - - /** - * Returns this Token's starting offset, the position of the first character - * corresponding to this token in the source text. - * - * Note: - * The difference between getEndOffset() and getStartOffset() may not be equal - * to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered - * by a stemmer or some other filter. - * - * @return integer - */ - public function getStartOffset() - { - return $this->_startOffset; - } - - /** - * Returns this Token's ending offset, one greater than the position of the - * last character corresponding to this token in the source text. - * - * @return integer - */ - public function getEndOffset() - { - return $this->_endOffset; - } - - /** - * Returns this Token's lexical type. Defaults to 'word'. - * - * @return string - */ - public function getType() - { - return $this->_type; - } -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/TokenFilter.php b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/TokenFilter.php deleted file mode 100644 index 9ea5125f..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/TokenFilter.php +++ /dev/null @@ -1,45 +0,0 @@ -<?php -/** - * Zend Framework - * - * LICENSE - * - * This source file is subject to version 1.0 of the Zend Framework - * license, that is bundled with this package in the file LICENSE, and - * is available through the world-wide-web at the following URL: - * http://www.zend.com/license/framework/1_0.txt. If you did not receive - * a copy of the Zend Framework license and are unable to obtain it - * through the world-wide-web, please send a note to license@zend.com - * so we can mail you a copy immediately. - * - * @package Zend_Search_Lucene - * @subpackage Analysis - * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) - * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 - */ - - -/** Zend_Search_Lucene_Analysis_Token */ -require_once 'Zend/Search/Lucene/Analysis/Token.php'; - - -/** - * Token filter converts (normalizes) Token ore removes it from a token stream. - * - * @package Zend_Search_Lucene - * @subpackage Analysis - * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) - * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 - */ - -abstract class Zend_Search_Lucene_Analysis_TokenFilter -{ - /** - * Normalize Token or remove it (if null is returned) - * - * @param Zend_Search_Lucene_Analysis_Token $srcToken - * @return Zend_Search_Lucene_Analysis_Token - */ - abstract public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken); -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php deleted file mode 100644 index 53585e21..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php +++ /dev/null @@ -1,55 +0,0 @@ -<?php -/** - * Zend Framework - * - * LICENSE - * - * This source file is subject to version 1.0 of the Zend Framework - * license, that is bundled with this package in the file LICENSE, and - * is available through the world-wide-web at the following URL: - * http://www.zend.com/license/framework/1_0.txt. If you did not receive - * a copy of the Zend Framework license and are unable to obtain it - * through the world-wide-web, please send a note to license@zend.com - * so we can mail you a copy immediately. - * - * @package Zend_Search_Lucene - * @subpackage Analysis - * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) - * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 - */ - - -/** Zend_Search_Lucene_Analysis_TokenFilter */ -require_once 'Zend/Search/Lucene/Analysis/TokenFilter.php'; - - -/** - * Lower case Token filter. - * - * @package Zend_Search_Lucene - * @subpackage Analysis - * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) - * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 - */ - -class Zend_Search_Lucene_Analysis_TokenFilter_LowerCase extends Zend_Search_Lucene_Analysis_TokenFilter -{ - /** - * Normalize Token or remove it (if null is returned) - * - * @param Zend_Search_Lucene_Analysis_Token $srcToken - * @return Zend_Search_Lucene_Analysis_Token - */ - public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken) - { - $newToken = new Zend_Search_Lucene_Analysis_Token(strtolower( $srcToken->getTermText() ), - $srcToken->getStartOffset(), - $srcToken->getEndOffset(), - $srcToken->getType()); - - $newToken->setPositionIncrement($srcToken->getPositionIncrement()); - - return $newToken; - } -} - |