summaryrefslogtreecommitdiff
path: root/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer
diff options
context:
space:
mode:
Diffstat (limited to 'buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer')
-rw-r--r--buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common.php73
-rw-r--r--buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php76
-rw-r--r--buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php43
3 files changed, 192 insertions, 0 deletions
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common.php b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common.php
new file mode 100644
index 00000000..5c61e5b5
--- /dev/null
+++ b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common.php
@@ -0,0 +1,73 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to version 1.0 of the Zend Framework
+ * license, that is bundled with this package in the file LICENSE, and
+ * is available through the world-wide-web at the following URL:
+ * http://www.zend.com/license/framework/1_0.txt. If you did not receive
+ * a copy of the Zend Framework license and are unable to obtain it
+ * through the world-wide-web, please send a note to license@zend.com
+ * so we can mail you a copy immediately.
+ *
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
+ */
+
+
+/** Zend_Search_Lucene_Analysis_Analyzer */
+require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
+
+
+/**
+ * Common implementation of the Zend_Search_Lucene_Analysis_Analyzer interface.
+ * There are several standard standard subclasses provided by Zend_Search_Lucene/Analysis
+ * subpackage: Zend_Search_Lucene_Analysis_Analyzer_Common_Text, ZSearchHTMLAnalyzer, ZSearchXMLAnalyzer.
+ *
+ * @todo ZSearchHTMLAnalyzer and ZSearchXMLAnalyzer implementation
+ *
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
+ */
+abstract class Zend_Search_Lucene_Analysis_Analyzer_Common extends Zend_Search_Lucene_Analysis_Analyzer
+{
+ /**
+ * The set of Token filters applied to the Token stream.
+ * Array of Zend_Search_Lucene_Analysis_TokenFilter objects.
+ *
+ * @var array
+ */
+ private $_filters = array();
+
+ /**
+ * Add Token filter to the Analyzer
+ *
+ * @param Zend_Search_Lucene_Analysis_TokenFilter $filter
+ */
+ public function addFilter(Zend_Search_Lucene_Analysis_TokenFilter $filter)
+ {
+ $this->_filters[] = $filter;
+ }
+
+ /**
+ * Apply filters to the token.
+ *
+ * @param Zend_Search_Lucene_Analysis_Token $token
+ * @return Zend_Search_Lucene_Analysis_Token
+ */
+ public function normalize(Zend_Search_Lucene_Analysis_Token $token)
+ {
+ foreach ($this->_filters as $filter) {
+ $token = $filter->normalize($token);
+ }
+
+ return $token;
+ }
+}
+
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php
new file mode 100644
index 00000000..2a80c1f8
--- /dev/null
+++ b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php
@@ -0,0 +1,76 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to version 1.0 of the Zend Framework
+ * license, that is bundled with this package in the file LICENSE, and
+ * is available through the world-wide-web at the following URL:
+ * http://www.zend.com/license/framework/1_0.txt. If you did not receive
+ * a copy of the Zend Framework license and are unable to obtain it
+ * through the world-wide-web, please send a note to license@zend.com
+ * so we can mail you a copy immediately.
+ *
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
+ */
+
+
+/** Zend_Search_Lucene_Analysis_Analyzer_Common */
+require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php';
+
+
+/**
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
+ */
+
+class Zend_Search_Lucene_Analysis_Analyzer_Common_Text extends Zend_Search_Lucene_Analysis_Analyzer_Common
+{
+ /**
+ * Tokenize text to a terms
+ * Returns array of Zend_Search_Lucene_Analysis_Token objects
+ *
+ * @param string $data
+ * @return array
+ */
+ public function tokenize($data)
+ {
+ $tokenStream = array();
+
+ $position = 0;
+ while ($position < strlen($data)) {
+ // skip white space
+ while ($position < strlen($data) && !ctype_alpha( $data{$position} )) {
+ $position++;
+ }
+
+ $termStartPosition = $position;
+
+ // read token
+ while ($position < strlen($data) && ctype_alpha( $data{$position} )) {
+ $position++;
+ }
+
+ // Empty token, end of stream.
+ if ($position == $termStartPosition) {
+ break;
+ }
+
+ $token = new Zend_Search_Lucene_Analysis_Token(substr($data,
+ $termStartPosition,
+ $position-$termStartPosition),
+ $termStartPosition,
+ $position);
+ $tokenStream[] = $this->normalize($token);
+ }
+
+ return $tokenStream;
+ }
+}
+
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php
new file mode 100644
index 00000000..d77e38d5
--- /dev/null
+++ b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php
@@ -0,0 +1,43 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to version 1.0 of the Zend Framework
+ * license, that is bundled with this package in the file LICENSE, and
+ * is available through the world-wide-web at the following URL:
+ * http://www.zend.com/license/framework/1_0.txt. If you did not receive
+ * a copy of the Zend Framework license and are unable to obtain it
+ * through the world-wide-web, please send a note to license@zend.com
+ * so we can mail you a copy immediately.
+ *
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
+ */
+
+
+/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text */
+require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php';
+
+/** Zend_Search_Lucene_Analysis_TokenFilter_LowerCase */
+require_once 'Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php';
+
+
+/**
+ * @package Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
+ */
+
+class Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive extends Zend_Search_Lucene_Analysis_Analyzer_Common_Text
+{
+ public function __construct()
+ {
+ $this->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_LowerCase());
+ }
+}
+