diff options
Diffstat (limited to 'buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer')
3 files changed, 192 insertions, 0 deletions
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common.php b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common.php new file mode 100644 index 00000000..5c61e5b5 --- /dev/null +++ b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common.php @@ -0,0 +1,73 @@ +<?php +/** + * Zend Framework + * + * LICENSE + * + * This source file is subject to version 1.0 of the Zend Framework + * license, that is bundled with this package in the file LICENSE, and + * is available through the world-wide-web at the following URL: + * http://www.zend.com/license/framework/1_0.txt. If you did not receive + * a copy of the Zend Framework license and are unable to obtain it + * through the world-wide-web, please send a note to license@zend.com + * so we can mail you a copy immediately. + * + * @package Zend_Search_Lucene + * @subpackage Analysis + * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) + * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 + */ + + +/** Zend_Search_Lucene_Analysis_Analyzer */ +require_once 'Zend/Search/Lucene/Analysis/Analyzer.php'; + + +/** + * Common implementation of the Zend_Search_Lucene_Analysis_Analyzer interface. + * There are several standard standard subclasses provided by Zend_Search_Lucene/Analysis + * subpackage: Zend_Search_Lucene_Analysis_Analyzer_Common_Text, ZSearchHTMLAnalyzer, ZSearchXMLAnalyzer. + * + * @todo ZSearchHTMLAnalyzer and ZSearchXMLAnalyzer implementation + * + * @package Zend_Search_Lucene + * @subpackage Analysis + * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) + * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 + */ +abstract class Zend_Search_Lucene_Analysis_Analyzer_Common extends Zend_Search_Lucene_Analysis_Analyzer +{ + /** + * The set of Token filters applied to the Token stream. + * Array of Zend_Search_Lucene_Analysis_TokenFilter objects. + * + * @var array + */ + private $_filters = array(); + + /** + * Add Token filter to the Analyzer + * + * @param Zend_Search_Lucene_Analysis_TokenFilter $filter + */ + public function addFilter(Zend_Search_Lucene_Analysis_TokenFilter $filter) + { + $this->_filters[] = $filter; + } + + /** + * Apply filters to the token. + * + * @param Zend_Search_Lucene_Analysis_Token $token + * @return Zend_Search_Lucene_Analysis_Token + */ + public function normalize(Zend_Search_Lucene_Analysis_Token $token) + { + foreach ($this->_filters as $filter) { + $token = $filter->normalize($token); + } + + return $token; + } +} + diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php new file mode 100644 index 00000000..2a80c1f8 --- /dev/null +++ b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php @@ -0,0 +1,76 @@ +<?php +/** + * Zend Framework + * + * LICENSE + * + * This source file is subject to version 1.0 of the Zend Framework + * license, that is bundled with this package in the file LICENSE, and + * is available through the world-wide-web at the following URL: + * http://www.zend.com/license/framework/1_0.txt. If you did not receive + * a copy of the Zend Framework license and are unable to obtain it + * through the world-wide-web, please send a note to license@zend.com + * so we can mail you a copy immediately. + * + * @package Zend_Search_Lucene + * @subpackage Analysis + * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) + * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 + */ + + +/** Zend_Search_Lucene_Analysis_Analyzer_Common */ +require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php'; + + +/** + * @package Zend_Search_Lucene + * @subpackage Analysis + * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) + * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 + */ + +class Zend_Search_Lucene_Analysis_Analyzer_Common_Text extends Zend_Search_Lucene_Analysis_Analyzer_Common +{ + /** + * Tokenize text to a terms + * Returns array of Zend_Search_Lucene_Analysis_Token objects + * + * @param string $data + * @return array + */ + public function tokenize($data) + { + $tokenStream = array(); + + $position = 0; + while ($position < strlen($data)) { + // skip white space + while ($position < strlen($data) && !ctype_alpha( $data{$position} )) { + $position++; + } + + $termStartPosition = $position; + + // read token + while ($position < strlen($data) && ctype_alpha( $data{$position} )) { + $position++; + } + + // Empty token, end of stream. + if ($position == $termStartPosition) { + break; + } + + $token = new Zend_Search_Lucene_Analysis_Token(substr($data, + $termStartPosition, + $position-$termStartPosition), + $termStartPosition, + $position); + $tokenStream[] = $this->normalize($token); + } + + return $tokenStream; + } +} + diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php new file mode 100644 index 00000000..d77e38d5 --- /dev/null +++ b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php @@ -0,0 +1,43 @@ +<?php +/** + * Zend Framework + * + * LICENSE + * + * This source file is subject to version 1.0 of the Zend Framework + * license, that is bundled with this package in the file LICENSE, and + * is available through the world-wide-web at the following URL: + * http://www.zend.com/license/framework/1_0.txt. If you did not receive + * a copy of the Zend Framework license and are unable to obtain it + * through the world-wide-web, please send a note to license@zend.com + * so we can mail you a copy immediately. + * + * @package Zend_Search_Lucene + * @subpackage Analysis + * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) + * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 + */ + + +/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text */ +require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php'; + +/** Zend_Search_Lucene_Analysis_TokenFilter_LowerCase */ +require_once 'Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php'; + + +/** + * @package Zend_Search_Lucene + * @subpackage Analysis + * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) + * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 + */ + +class Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive extends Zend_Search_Lucene_Analysis_Analyzer_Common_Text +{ + public function __construct() + { + $this->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_LowerCase()); + } +} + |