summaryrefslogtreecommitdiff
path: root/buildscripts/texbuilder/Zend/Search/Lucene/Analysis
diff options
context:
space:
mode:
Diffstat (limited to 'buildscripts/texbuilder/Zend/Search/Lucene/Analysis')
-rw-r--r--buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer.php94
-rw-r--r--buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Token.php170
-rw-r--r--buildscripts/texbuilder/Zend/Search/Lucene/Analysis/TokenFilter.php45
-rw-r--r--buildscripts/texbuilder/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php55
4 files changed, 0 insertions, 364 deletions
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer.php b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer.php
deleted file mode 100644
index 8e234c16..00000000
--- a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer.php
+++ /dev/null
@@ -1,94 +0,0 @@
-<?php
-/**
- * Zend Framework
- *
- * LICENSE
- *
- * This source file is subject to version 1.0 of the Zend Framework
- * license, that is bundled with this package in the file LICENSE, and
- * is available through the world-wide-web at the following URL:
- * http://www.zend.com/license/framework/1_0.txt. If you did not receive
- * a copy of the Zend Framework license and are unable to obtain it
- * through the world-wide-web, please send a note to license@zend.com
- * so we can mail you a copy immediately.
- *
- * @package Zend_Search_Lucene
- * @subpackage Analysis
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-
-
-/** Zend_Search_Lucene_Analysis_Token */
-require_once 'Zend/Search/Lucene/Analysis/Token.php';
-
-/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text */
-require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php';
-
-/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive */
-require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php';
-
-
-
-/**
- * An Analyzer is used to analyze text.
- * It thus represents a policy for extracting index terms from text.
- *
- * Note:
- * Lucene Java implementation is oriented to streams. It provides effective work
- * with a huge documents (more then 20Mb).
- * But engine itself is not oriented such documents.
- * Thus Zend_Search_Lucene analysis API works with data strings and sets (arrays).
- *
- * @package Zend_Search_Lucene
- * @subpackage Analysis
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-
-abstract class Zend_Search_Lucene_Analysis_Analyzer
-{
- /**
- * The Analyzer implementation used by default.
- *
- * @var Zend_Search_Lucene_Analysis_Analyzer
- */
- static private $_defaultImpl;
-
- /**
- * Tokenize text to a terms
- * Returns array of Zend_Search_Lucene_Analysis_Token objects
- *
- * @param string $data
- * @return array
- */
- abstract public function tokenize($data);
-
-
- /**
- * Set the default Analyzer implementation used by indexing code.
- *
- * @param Zend_Search_Lucene_Analysis_Analyzer $similarity
- */
- static public function setDefault(Zend_Search_Lucene_Analysis_Analyzer $analyzer)
- {
- self::$_defaultImpl = $analyzer;
- }
-
-
- /**
- * Return the default Analyzer implementation used by indexing code.
- *
- * @return Zend_Search_Lucene_Analysis_Analyzer
- */
- static public function getDefault()
- {
- if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Analysis_Analyzer) {
- self::$_defaultImpl = new Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive();
- }
-
- return self::$_defaultImpl;
- }
-
-}
-
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Token.php b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Token.php
deleted file mode 100644
index a60d5d96..00000000
--- a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Token.php
+++ /dev/null
@@ -1,170 +0,0 @@
-<?php
-/**
- * Zend Framework
- *
- * LICENSE
- *
- * This source file is subject to version 1.0 of the Zend Framework
- * license, that is bundled with this package in the file LICENSE, and
- * is available through the world-wide-web at the following URL:
- * http://www.zend.com/license/framework/1_0.txt. If you did not receive
- * a copy of the Zend Framework license and are unable to obtain it
- * through the world-wide-web, please send a note to license@zend.com
- * so we can mail you a copy immediately.
- *
- * @package Zend_Search_Lucene
- * @subpackage document
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-
-
-/**
- *
- * @package Zend_Search_Lucene
- * @subpackage Analysis
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-class Zend_Search_Lucene_Analysis_Token
-{
- /**
- * The text of the term.
- *
- * @var string
- */
- private $_termText;
-
- /**
- * Start in source text.
- *
- * @var integer
- */
- private $_startOffset;
-
- /**
- * End in source text
- *
- * @var integer
- */
- private $_endOffset;
-
- /**
- * Lexical type.
- *
- * @var string
- */
- private $_type;
-
- /**
- * The position of this token relative to the previous Token.
- *
- * The default value is one.
- *
- * Some common uses for this are:
- * Set it to zero to put multiple terms in the same position. This is
- * useful if, e.g., a word has multiple stems. Searches for phrases
- * including either stem will match. In this case, all but the first stem's
- * increment should be set to zero: the increment of the first instance
- * should be one. Repeating a token with an increment of zero can also be
- * used to boost the scores of matches on that token.
- *
- * Set it to values greater than one to inhibit exact phrase matches.
- * If, for example, one does not want phrases to match across removed stop
- * words, then one could build a stop word filter that removes stop words and
- * also sets the increment to the number of stop words removed before each
- * non-stop word. Then exact phrase queries will only match when the terms
- * occur with no intervening stop words.
- *
- * @var integer
- */
- private $_positionIncrement;
-
-
- /**
- * Object constructor
- *
- * @param string $text
- * @param integer $start
- * @param integer $end
- * @param string $type
- */
- public function __construct($text, $start, $end, $type = 'word' )
- {
- $this->_termText = $text;
- $this->_startOffset = $start;
- $this->_endOffset = $end;
- $this->_type = $type;
-
- $this->_positionIncrement = 1;
- }
-
-
- /**
- * positionIncrement setter
- *
- * @param integer $positionIncrement
- */
- public function setPositionIncrement($positionIncrement)
- {
- $this->_positionIncrement = $positionIncrement;
- }
-
- /**
- * Returns the position increment of this Token.
- *
- * @return integer
- */
- public function getPositionIncrement()
- {
- return $this->_positionIncrement;
- }
-
- /**
- * Returns the Token's term text.
- *
- * @return string
- */
- public function getTermText()
- {
- return $this->_termText;
- }
-
- /**
- * Returns this Token's starting offset, the position of the first character
- * corresponding to this token in the source text.
- *
- * Note:
- * The difference between getEndOffset() and getStartOffset() may not be equal
- * to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered
- * by a stemmer or some other filter.
- *
- * @return integer
- */
- public function getStartOffset()
- {
- return $this->_startOffset;
- }
-
- /**
- * Returns this Token's ending offset, one greater than the position of the
- * last character corresponding to this token in the source text.
- *
- * @return integer
- */
- public function getEndOffset()
- {
- return $this->_endOffset;
- }
-
- /**
- * Returns this Token's lexical type. Defaults to 'word'.
- *
- * @return string
- */
- public function getType()
- {
- return $this->_type;
- }
-}
-
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/TokenFilter.php b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/TokenFilter.php
deleted file mode 100644
index 9ea5125f..00000000
--- a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/TokenFilter.php
+++ /dev/null
@@ -1,45 +0,0 @@
-<?php
-/**
- * Zend Framework
- *
- * LICENSE
- *
- * This source file is subject to version 1.0 of the Zend Framework
- * license, that is bundled with this package in the file LICENSE, and
- * is available through the world-wide-web at the following URL:
- * http://www.zend.com/license/framework/1_0.txt. If you did not receive
- * a copy of the Zend Framework license and are unable to obtain it
- * through the world-wide-web, please send a note to license@zend.com
- * so we can mail you a copy immediately.
- *
- * @package Zend_Search_Lucene
- * @subpackage Analysis
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-
-
-/** Zend_Search_Lucene_Analysis_Token */
-require_once 'Zend/Search/Lucene/Analysis/Token.php';
-
-
-/**
- * Token filter converts (normalizes) Token ore removes it from a token stream.
- *
- * @package Zend_Search_Lucene
- * @subpackage Analysis
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-
-abstract class Zend_Search_Lucene_Analysis_TokenFilter
-{
- /**
- * Normalize Token or remove it (if null is returned)
- *
- * @param Zend_Search_Lucene_Analysis_Token $srcToken
- * @return Zend_Search_Lucene_Analysis_Token
- */
- abstract public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken);
-}
-
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php
deleted file mode 100644
index 53585e21..00000000
--- a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php
+++ /dev/null
@@ -1,55 +0,0 @@
-<?php
-/**
- * Zend Framework
- *
- * LICENSE
- *
- * This source file is subject to version 1.0 of the Zend Framework
- * license, that is bundled with this package in the file LICENSE, and
- * is available through the world-wide-web at the following URL:
- * http://www.zend.com/license/framework/1_0.txt. If you did not receive
- * a copy of the Zend Framework license and are unable to obtain it
- * through the world-wide-web, please send a note to license@zend.com
- * so we can mail you a copy immediately.
- *
- * @package Zend_Search_Lucene
- * @subpackage Analysis
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-
-
-/** Zend_Search_Lucene_Analysis_TokenFilter */
-require_once 'Zend/Search/Lucene/Analysis/TokenFilter.php';
-
-
-/**
- * Lower case Token filter.
- *
- * @package Zend_Search_Lucene
- * @subpackage Analysis
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-
-class Zend_Search_Lucene_Analysis_TokenFilter_LowerCase extends Zend_Search_Lucene_Analysis_TokenFilter
-{
- /**
- * Normalize Token or remove it (if null is returned)
- *
- * @param Zend_Search_Lucene_Analysis_Token $srcToken
- * @return Zend_Search_Lucene_Analysis_Token
- */
- public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken)
- {
- $newToken = new Zend_Search_Lucene_Analysis_Token(strtolower( $srcToken->getTermText() ),
- $srcToken->getStartOffset(),
- $srcToken->getEndOffset(),
- $srcToken->getType());
-
- $newToken->setPositionIncrement($srcToken->getPositionIncrement());
-
- return $newToken;
- }
-}
-