From 702fb4a66d473fb1ee98aaa81cd26b4813279ef8 Mon Sep 17 00:00:00 2001 From: wei <> Date: Tue, 9 May 2006 11:39:11 +0000 Subject: Remove duplicate --- buildscripts/texbuilder/Zend/Exception.php | 28 - buildscripts/texbuilder/Zend/LICENSE.txt | 52 -- buildscripts/texbuilder/Zend/Search/Exception.php | 34 -- buildscripts/texbuilder/Zend/Search/Lucene.php | 569 --------------------- .../Zend/Search/Lucene/Analysis/Analyzer.php | 94 ---- .../Zend/Search/Lucene/Analysis/Token.php | 170 ------ .../Zend/Search/Lucene/Analysis/TokenFilter.php | 45 -- .../Lucene/Analysis/TokenFilter/LowerCase.php | 55 -- .../texbuilder/Zend/Search/Lucene/Document.php | 109 ---- .../texbuilder/Zend/Search/Lucene/Exception.php | 34 -- .../texbuilder/Zend/Search/Lucene/Field.php | 134 ----- .../Zend/Search/Lucene/Index/FieldInfo.php | 43 -- .../Zend/Search/Lucene/Index/SegmentInfo.php | 412 --------------- .../Zend/Search/Lucene/Index/SegmentWriter.php | 491 ------------------ .../texbuilder/Zend/Search/Lucene/Index/Term.php | 70 --- .../Zend/Search/Lucene/Index/TermInfo.php | 77 --- .../texbuilder/Zend/Search/Lucene/Index/Writer.php | 308 ----------- .../texbuilder/Zend/Search/Lucene/Search/Query.php | 98 ---- .../Zend/Search/Lucene/Search/Query/MultiTerm.php | 437 ---------------- .../Zend/Search/Lucene/Search/Query/Phrase.php | 424 --------------- .../Zend/Search/Lucene/Search/Query/Term.php | 126 ----- .../Zend/Search/Lucene/Search/QueryHit.php | 106 ---- .../Zend/Search/Lucene/Search/QueryParser.php | 140 ----- .../Zend/Search/Lucene/Search/QueryToken.php | 102 ---- .../Zend/Search/Lucene/Search/QueryTokenizer.php | 162 ------ .../Zend/Search/Lucene/Search/Similarity.php | 551 -------------------- .../Search/Lucene/Search/Similarity/Default.php | 99 ---- .../Zend/Search/Lucene/Search/Weight.php | 59 --- .../Zend/Search/Lucene/Search/Weight/MultiTerm.php | 133 ----- .../Zend/Search/Lucene/Search/Weight/Phrase.php | 138 ----- .../Zend/Search/Lucene/Search/Weight/Term.php | 144 ------ .../Zend/Search/Lucene/Storage/Directory.php | 118 ----- .../Search/Lucene/Storage/Directory/Filesystem.php | 269 ---------- .../texbuilder/Zend/Search/Lucene/Storage/File.php | 376 -------------- .../Zend/Search/Lucene/Storage/File/Filesystem.php | 170 ------ buildscripts/texbuilder/Zend/Search/TODO.txt | 14 - 36 files changed, 6391 deletions(-) delete mode 100644 buildscripts/texbuilder/Zend/Exception.php delete mode 100644 buildscripts/texbuilder/Zend/LICENSE.txt delete mode 100644 buildscripts/texbuilder/Zend/Search/Exception.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Token.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Analysis/TokenFilter.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Document.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Exception.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Field.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Index/FieldInfo.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentInfo.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentWriter.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Index/Term.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Index/TermInfo.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Index/Writer.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Search/Query.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Search/Query/MultiTerm.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Search/Query/Phrase.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Search/Query/Term.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Search/QueryHit.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Search/QueryParser.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Search/QueryToken.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Search/QueryTokenizer.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Search/Similarity.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Search/Similarity/Default.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Search/Weight.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Search/Weight/MultiTerm.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Search/Weight/Phrase.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Search/Weight/Term.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Storage/Directory.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Storage/Directory/Filesystem.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Storage/File.php delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene/Storage/File/Filesystem.php delete mode 100644 buildscripts/texbuilder/Zend/Search/TODO.txt (limited to 'buildscripts/texbuilder') diff --git a/buildscripts/texbuilder/Zend/Exception.php b/buildscripts/texbuilder/Zend/Exception.php deleted file mode 100644 index ab5e4e95..00000000 --- a/buildscripts/texbuilder/Zend/Exception.php +++ /dev/null @@ -1,28 +0,0 @@ -_directory = $directory; - $this->_closeDirOnExit = false; - } else { - $this->_directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory); - $this->_closeDirOnExit = true; - } - - if ($create) { - $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, true); - } else { - $this->_writer = null; - } - - $this->_segmentInfos = array(); - - $segmentsFile = $this->_directory->getFileObject('segments'); - - $format = $segmentsFile->readInt(); - - if ($format != (int)0xFFFFFFFF) { - throw new Zend_Search_Lucene_Exception('Wrong segments file format'); - } - - // read version - $segmentsFile->readLong(); - - // read counter - $segmentsFile->readInt(); - - $segments = $segmentsFile->readInt(); - - $this->_docCount = 0; - - // read segmentInfos - for ($count = 0; $count < $segments; $count++) { - $segName = $segmentsFile->readString(); - $segSize = $segmentsFile->readInt(); - $this->_docCount += $segSize; - - $this->_segmentInfos[$count] = - new Zend_Search_Lucene_Index_SegmentInfo($segName, - $segSize, - $this->_directory); - } - } - - - /** - * Object destructor - */ - public function __destruct() - { - $this->commit(); - - if ($this->_closeDirOnExit) { - $this->_directory->close(); - } - } - - /** - * Returns an instance of Zend_Search_Lucene_Index_Writer for the index - * - * @return Zend_Search_Lucene_Index_Writer - */ - public function getIndexWriter() - { - if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) { - $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory); - } - - return $this->_writer; - } - - - /** - * Returns the Zend_Search_Lucene_Storage_Directory instance for this index. - * - * @return Zend_Search_Lucene_Storage_Directory - */ - public function getDirectory() - { - return $this->_directory; - } - - - /** - * Returns the total number of documents in this index. - * - * @return integer - */ - public function count() - { - return $this->_docCount; - } - - - /** - * Performs a query against the index and returns an array - * of Zend_Search_Lucene_Search_QueryHit objects. - * Input is a string or Zend_Search_Lucene_Search_Query. - * - * @param mixed $query - * @return array ZSearchHit - */ - public function find($query) - { - if (is_string($query)) { - $query = Zend_Search_Lucene_Search_QueryParser::parse($query); - } - - if (!$query instanceof Zend_Search_Lucene_Search_Query) { - throw new Zend_Search_Lucene_Exception('Query must be a string or Zend_Search_Lucene_Search_Query object'); - } - - $this->commit(); - - $hits = array(); - $scores = array(); - - $docNum = $this->count(); - for( $count=0; $count < $docNum; $count++ ) { - $docScore = $query->score( $count, $this); - if( $docScore != 0 ) { - $hit = new Zend_Search_Lucene_Search_QueryHit($this); - $hit->id = $count; - $hit->score = $docScore; - - $hits[] = $hit; - $scores[] = $docScore; - } - } - array_multisort($scores, SORT_DESC, SORT_REGULAR, $hits); - - return $hits; - } - - - /** - * Returns a list of all unique field names that exist in this index. - * - * @param boolean $indexed - * @return array - */ - public function getFieldNames($indexed = false) - { - $result = array(); - foreach( $this->_segmentInfos as $segmentInfo ) { - $result = array_merge($result, $segmentInfo->getFields($indexed)); - } - return $result; - } - - - /** - * Returns a Zend_Search_Lucene_Document object for the document - * number $id in this index. - * - * @param integer|Zend_Search_Lucene_Search_QueryHit $id - * @return Zend_Search_Lucene_Document - */ - public function getDocument($id) - { - if ($id instanceof Zend_Search_Lucene_Search_QueryHit) { - /* @var $id Zend_Search_Lucene_Search_QueryHit */ - $id = $id->id; - } - - if ($id >= $this->_docCount) { - /** - * @todo exception here? - */ - return null; - } - - $segCount = 0; - $nextSegmentStartId = $this->_segmentInfos[ 0 ]->count(); - while( $nextSegmentStartId <= $id ) { - $segCount++; - $nextSegmentStartId += $this->_segmentInfos[ $segCount ]->count(); - } - $segmentStartId = $nextSegmentStartId - $this->_segmentInfos[ $segCount ]->count(); - - $fdxFile = $this->_segmentInfos[ $segCount ]->openCompoundFile('.fdx'); - $fdxFile->seek( ($id-$segmentStartId)*8, SEEK_CUR ); - $fieldValuesPosition = $fdxFile->readLong(); - - $fdtFile = $this->_segmentInfos[ $segCount ]->openCompoundFile('.fdt'); - $fdtFile->seek( $fieldValuesPosition, SEEK_CUR ); - $fieldCount = $fdtFile->readVInt(); - - $doc = new Zend_Search_Lucene_Document(); - for( $count = 0; $count < $fieldCount; $count++ ) { - $fieldNum = $fdtFile->readVInt(); - $bits = $fdtFile->readByte(); - - $fieldInfo = $this->_segmentInfos[ $segCount ]->getField($fieldNum); - - if( !($bits & 2) ) { // Text data - $field = new Zend_Search_Lucene_Field($fieldInfo->name, - $fdtFile->readString(), - true, - $fieldInfo->isIndexed, - $bits & 1 ); - } else { - $field = new Zend_Search_Lucene_Field($fieldInfo->name, - $fdtFile->readBinary(), - true, - $fieldInfo->isIndexed, - $bits & 1 ); - } - - $doc->addField($field); - } - - return $doc; - } - - - /** - * Returns an array of all the documents which contain term. - * - * @param Zend_Search_Lucene_Index_Term $term - * @return array - */ - public function termDocs(Zend_Search_Lucene_Index_Term $term) - { - $result = array(); - $segmentStartDocId = 0; - - foreach ($this->_segmentInfos as $segInfo) { - $termInfo = $segInfo->getTermInfo($term); - - if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) { - $segmentStartDocId += $segInfo->count(); - continue; - } - - $frqFile = $segInfo->openCompoundFile('.frq'); - $frqFile->seek($termInfo->freqPointer,SEEK_CUR); - $docId = 0; - for( $count=0; $count < $termInfo->docFreq; $count++ ) { - $docDelta = $frqFile->readVInt(); - if( $docDelta % 2 == 1 ) { - $docId += ($docDelta-1)/2; - } else { - $docId += $docDelta/2; - // read freq - $frqFile->readVInt(); - } - $result[] = $segmentStartDocId + $docId; - } - - $segmentStartDocId += $segInfo->count(); - } - - return $result; - } - - - /** - * Returns an array of all term positions in the documents. - * Return array structure: array( docId => array( pos1, pos2, ...), ...) - * - * @param Zend_Search_Lucene_Index_Term $term - * @return array - */ - public function termPositions(Zend_Search_Lucene_Index_Term $term) - { - $result = array(); - $segmentStartDocId = 0; - foreach( $this->_segmentInfos as $segInfo ) { - $termInfo = $segInfo->getTermInfo($term); - - if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) { - $segmentStartDocId += $segInfo->count(); - continue; - } - - $frqFile = $segInfo->openCompoundFile('.frq'); - $frqFile->seek($termInfo->freqPointer,SEEK_CUR); - $freqs = array(); - $docId = 0; - - for( $count = 0; $count < $termInfo->docFreq; $count++ ) { - $docDelta = $frqFile->readVInt(); - if( $docDelta % 2 == 1 ) { - $docId += ($docDelta-1)/2; - $freqs[ $docId ] = 1; - } else { - $docId += $docDelta/2; - $freqs[ $docId ] = $frqFile->readVInt(); - } - } - - $prxFile = $segInfo->openCompoundFile('.prx'); - $prxFile->seek($termInfo->proxPointer,SEEK_CUR); - foreach ($freqs as $docId => $freq) { - $termPosition = 0; - $positions = array(); - - for ($count = 0; $count < $freq; $count++ ) { - $termPosition += $prxFile->readVInt(); - $positions[] = $termPosition; - } - $result[ $segmentStartDocId + $docId ] = $positions; - } - - $segmentStartDocId += $segInfo->count(); - } - - return $result; - } - - - /** - * Returns the number of documents in this index containing the $term. - * - * @param Zend_Search_Lucene_Index_Term $term - * @return integer - */ - public function docFreq(Zend_Search_Lucene_Index_Term $term) - { - $result = 0; - foreach ($this->_segmentInfos as $segInfo) { - $termInfo = $segInfo->getTermInfo($term); - if ($termInfo !== null) { - $result += $termInfo->docFreq; - } - } - - return $result; - } - - - /** - * Retrive similarity used by index reader - * - * @return Zend_Search_Lucene_Search_Similarity - */ - public function getSimilarity() - { - return Zend_Search_Lucene_Search_Similarity::getDefault(); - } - - - /** - * Returns a normalization factor for "field, document" pair. - * - * @param integer $id - * @param string $fieldName - * @return Zend_Search_Lucene_Document - */ - public function norm( $id, $fieldName ) - { - if( $id >= $this->_docCount ) - return null; - - $segCount = 0; - $nextSegmentStartId = $this->_segmentInfos[ 0 ]->count(); - while( $nextSegmentStartId <= $id ) { - $segCount++; - $nextSegmentStartId += $this->_segmentInfos[ $segCount ]->count(); - } - - $segmentStartId = $nextSegmentStartId - $this->_segmentInfos[ $segCount ]->count(); - - return $this->_segmentInfos[ $segCount ]->norm($id - $segmentStartId, $fieldName); - } - - - /** - * Adds a document to this index. - * - * @param Zend_Search_Lucene_Document $document - */ - public function addDocument(Zend_Search_Lucene_Document $document) - { - if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) { - $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory); - } - - $this->_writer->addDocument($document); - } - - - /** - * Commit changes resulting from delete() or undeleteAll() operations. - * - * @todo delete() and undeleteAll processing. - */ - public function commit() - { - if ($this->_writer !== null) { - foreach ($this->_writer->commit() as $segmentName => $segmentInfo) { - if ($segmentInfo !== null) { - $this->_segmentInfos[] = $segmentInfo; - $this->_docCount += $segmentInfo->count(); - } else { - foreach ($this->_segmentInfos as $segId => $segInfo) { - if ($segInfo->getName() == $segmentName) { - unset($this->_segmentInfos[$segId]); - } - } - } - } - } - } - - - /************************************************************************* - @todo UNIMPLEMENTED - *************************************************************************/ - - /** - * Returns an array of all terms in this index. - * - * @todo Implementation - * @return array - */ - public function terms() - { - return array(); - } - - - /** - * Returns true if any documents have been deleted from this index. - * - * @todo Implementation - * @return boolean - */ - public function hasDeletions() - { - return false; - } - - - /** - * Deletes a document from the index. $doc may contain a Zend_Search_Lucene_Document - * or the number of the document to delete. - * - * @todo Implementation - * @param mixed $item_to_del - */ - public function delete($doc) - {} - - - /** - * Undeletes all documents currently marked as deleted in this index. - * - * @todo Implementation - */ - public function undeleteAll() - {} -} \ No newline at end of file diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer.php b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer.php deleted file mode 100644 index 8e234c16..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/Analyzer.php +++ /dev/null @@ -1,94 +0,0 @@ -_termText = $text; - $this->_startOffset = $start; - $this->_endOffset = $end; - $this->_type = $type; - - $this->_positionIncrement = 1; - } - - - /** - * positionIncrement setter - * - * @param integer $positionIncrement - */ - public function setPositionIncrement($positionIncrement) - { - $this->_positionIncrement = $positionIncrement; - } - - /** - * Returns the position increment of this Token. - * - * @return integer - */ - public function getPositionIncrement() - { - return $this->_positionIncrement; - } - - /** - * Returns the Token's term text. - * - * @return string - */ - public function getTermText() - { - return $this->_termText; - } - - /** - * Returns this Token's starting offset, the position of the first character - * corresponding to this token in the source text. - * - * Note: - * The difference between getEndOffset() and getStartOffset() may not be equal - * to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered - * by a stemmer or some other filter. - * - * @return integer - */ - public function getStartOffset() - { - return $this->_startOffset; - } - - /** - * Returns this Token's ending offset, one greater than the position of the - * last character corresponding to this token in the source text. - * - * @return integer - */ - public function getEndOffset() - { - return $this->_endOffset; - } - - /** - * Returns this Token's lexical type. Defaults to 'word'. - * - * @return string - */ - public function getType() - { - return $this->_type; - } -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/TokenFilter.php b/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/TokenFilter.php deleted file mode 100644 index 9ea5125f..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Analysis/TokenFilter.php +++ /dev/null @@ -1,45 +0,0 @@ -getTermText() ), - $srcToken->getStartOffset(), - $srcToken->getEndOffset(), - $srcToken->getType()); - - $newToken->setPositionIncrement($srcToken->getPositionIncrement()); - - return $newToken; - } -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Document.php b/buildscripts/texbuilder/Zend/Search/Lucene/Document.php deleted file mode 100644 index 29c0c2d9..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Document.php +++ /dev/null @@ -1,109 +0,0 @@ -getFieldValue($offset); - } - - - /** - * Add a field object to this document. - * - * @param Zend_Search_Lucene_Field $field - */ - public function addField(Zend_Search_Lucene_Field $field) - { - $this->_fields[$field->name] = $field; - } - - - /** - * Return an array with the names of the fields in this document. - * - * @return array - */ - public function getFieldNames() - { - return array_keys($this->_fields); - } - - - /** - * Returns Zend_Search_Lucene_Field object for a named field in this document. - * - * @param string $fieldName - * @return Zend_Search_Lucene_Field - */ - public function getField($fieldName) - { - if (!array_key_exists($fieldName, $this->_fields)) { - throw new Zend_Search_Lucene_Exception("Field name \"$fieldName\" not found in document."); - } - return $this->_fields[$fieldName]; - } - - - /** - * Returns the string value of a named field in this document. - * - * @see __get() - * @return string - */ - public function getFieldValue($fieldName) - { - return $this->getField($fieldName)->stringValue; - } - -} diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Exception.php b/buildscripts/texbuilder/Zend/Search/Lucene/Exception.php deleted file mode 100644 index 5f12c5f6..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Exception.php +++ /dev/null @@ -1,34 +0,0 @@ -name = $name; - $this->stringValue = $stringValue; - $this->isStored = $isStored; - $this->isIndexed = $isIndexed; - $this->isTokenized = $isTokenized; - $this->isBinary = $isBinary; - - $this->storeTermVector = false; - $this->boost = 1.0; - } - - - /** - * Constructs a String-valued Field that is not tokenized, but is indexed - * and stored. Useful for non-text fields, e.g. date or url. - * - * @param string $name - * @param string $value - * @return Zend_Search_Lucene_Field - */ - static public function Keyword($name, $value) - { - return new self($name, $value, true, true, false); - } - - - /** - * Constructs a String-valued Field that is not tokenized nor indexed, - * but is stored in the index, for return with hits. - * - * @param string $name - * @param string $value - * @return Zend_Search_Lucene_Field - */ - static public function UnIndexed($name, $value) - { - return new self($name, $value, true, false, false); - } - - - /** - * Constructs a Binary String valued Field that is not tokenized nor indexed, - * but is stored in the index, for return with hits. - * - * @param string $name - * @param string $value - * @return Zend_Search_Lucene_Field - */ - static public function Binary($name, $value) - { - return new self($name, $value, true, false, false, true); - } - - /** - * Constructs a String-valued Field that is tokenized and indexed, - * and is stored in the index, for return with hits. Useful for short text - * fields, like "title" or "subject". Term vector will not be stored for this field. - * - * @param string $name - * @param string $value - * @return Zend_Search_Lucene_Field - */ - static public function Text($name, $value) - { - return new self($name, $value, true, true, true); - } - - - /** - * Constructs a String-valued Field that is tokenized and indexed, - * but that is not stored in the index. - * - * @param string $name - * @param string $value - * @return Zend_Search_Lucene_Field - */ - static public function UnStored($name, $value) - { - return new self($name, $value, false, true, true); - } - -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Index/FieldInfo.php b/buildscripts/texbuilder/Zend/Search/Lucene/Index/FieldInfo.php deleted file mode 100644 index eaca4ecf..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Index/FieldInfo.php +++ /dev/null @@ -1,43 +0,0 @@ -name = $name; - $this->isIndexed = $isIndexed; - $this->number = $number; - $this->storeTermVector = $storeTermVector; - } -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentInfo.php b/buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentInfo.php deleted file mode 100644 index f5c596a0..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentInfo.php +++ /dev/null @@ -1,412 +0,0 @@ - normVector - * normVector is a binary string. - * Each byte corresponds to an indexed document in a segment and - * encodes normalization factor (float value, encoded by - * Zend_Search_Lucene_Search_Similarity::encodeNorm()) - * - * @var array - */ - private $_norms = array(); - - /** - * Zend_Search_Lucene_Index_SegmentInfo constructor needs Segmentname, - * Documents count and Directory as a parameter. - * - * @param string $name - * @param integer $docCount - * @param Zend_Search_Lucene_Storage_Directory $directory - */ - public function __construct($name, $docCount, $directory) - { - $this->_name = $name; - $this->_docCount = $docCount; - $this->_directory = $directory; - $this->_termDictionary = null; - - $this->_segFiles = array(); - $cfsFile = $this->_directory->getFileObject($name . '.cfs'); - $segFilesCount = $cfsFile->readVInt(); - - for ($count = 0; $count < $segFilesCount; $count++) { - $dataOffset = $cfsFile->readLong(); - $fileName = $cfsFile->readString(); - $this->_segFiles[$fileName] = $dataOffset; - } - - $fnmFile = $this->openCompoundFile('.fnm'); - $fieldsCount = $fnmFile->readVInt(); - $fieldNames = array(); - $fieldNums = array(); - $this->_fields = array(); - for ($count=0; $count < $fieldsCount; $count++) { - $fieldName = $fnmFile->readString(); - $fieldBits = $fnmFile->readByte(); - $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName, - $fieldBits & 1, - $count, - $fieldBits & 2 ); - if ($fieldBits & 0x10) { - // norms are omitted for the indexed field - $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount); - } - - $fieldNums[$count] = $count; - $fieldNames[$count] = $fieldName; - } - array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums); - $this->_fieldsDicPositions = array_flip($fieldNums); - } - - /** - * Opens index file stoted within compound index file - * - * @param string $extension - * @throws Zend_Search_Lucene_Exception - * @return Zend_Search_Lucene_Storage_File - */ - public function openCompoundFile($extension) - { - $filename = $this->_name . $extension; - - if( !isset($this->_segFiles[ $filename ]) ) { - throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain ' - . $filename . ' file.' ); - } - - $file = $this->_directory->getFileObject( $this->_name.".cfs" ); - $file->seek( $this->_segFiles[ $filename ] ); - return $file; - } - - /** - * Returns field index or -1 if field is not found - * - * @param string $fieldName - * @return integer - */ - public function getFieldNum($fieldName) - { - foreach( $this->_fields as $field ) { - if( $field->name == $fieldName ) { - return $field->number; - } - } - - return -1; - } - - /** - * Returns field info for specified field - * - * @param integer $fieldNum - * @return ZSearchFieldInfo - */ - public function getField($fieldNum) - { - return $this->_fields[$fieldNum]; - } - - /** - * Returns array of fields. - * if $indexed parameter is true, then returns only indexed fields. - * - * @param boolean $indexed - * @return array - */ - public function getFields($indexed = false) - { - $result = array(); - foreach( $this->_fields as $field ) { - if( (!$indexed) || $field->isIndexed ) { - $result[ $field->name ] = $field->name; - } - } - return $result; - } - - /** - * Returns the total number of documents in this segment. - * - * @return integer - */ - public function count() - { - return $this->_docCount; - } - - - /** - * Loads Term dictionary from TermInfoIndex file - */ - protected function _loadDictionary() - { - if ($this->_termDictionary !== null) { - return; - } - - $this->_termDictionary = array(); - $this->_termDictionaryInfos = array(); - - $tiiFile = $this->openCompoundFile('.tii'); - $tiVersion = $tiiFile->readInt(); - if ($tiVersion != (int)0xFFFFFFFE) { - throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format'); - } - - $indexTermCount = $tiiFile->readLong(); - $tiiFile->readInt(); // IndexInterval - $skipInterval = $tiiFile->readInt(); - - $prevTerm = ''; - $freqPointer = 0; - $proxPointer = 0; - $indexPointer = 0; - for ($count = 0; $count < $indexTermCount; $count++) { - $termPrefixLength = $tiiFile->readVInt(); - $termSuffix = $tiiFile->readString(); - $termValue = substr( $prevTerm, 0, $termPrefixLength ) . $termSuffix; - - $termFieldNum = $tiiFile->readVInt(); - $docFreq = $tiiFile->readVInt(); - $freqPointer += $tiiFile->readVInt(); - $proxPointer += $tiiFile->readVInt(); - if( $docFreq >= $skipInterval ) { - $skipDelta = $tiiFile->readVInt(); - } else { - $skipDelta = 0; - } - - $indexPointer += $tiiFile->readVInt(); - - $this->_termDictionary[] = new Zend_Search_Lucene_Index_Term($termValue,$termFieldNum); - $this->_termDictionaryInfos[] = - new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipDelta, $indexPointer); - $prevTerm = $termValue; - } - } - - - /** - * Return segment name - * - * @return string - */ - public function getName() - { - return $this->_name; - } - - - /** - * Scans terms dictionary and returns term info - * - * @param Zend_Search_Lucene_Index_Term $term - * @return Zend_Search_Lucene_Index_TermInfo - */ - public function getTermInfo($term) - { - $this->_loadDictionary(); - - $searchField = $this->getFieldNum($term->field); - - if ($searchField == -1) { - return null; - } - $searchDicField = $this->_fieldsDicPositions[$searchField]; - - // search for appropriate value in dictionary - $lowIndex = 0; - $highIndex = count($this->_termDictionary)-1; - while ($highIndex >= $lowIndex) { - // $mid = ($highIndex - $lowIndex)/2; - $mid = ($highIndex + $lowIndex) >> 1; - $midTerm = $this->_termDictionary[$mid]; - - $delta = $searchDicField - $this->_fieldsDicPositions[$midTerm->field]; - if ($delta == 0) { - $delta = strcmp($term->text, $midTerm->text); - } - - if ($delta < 0) { - $highIndex = $mid-1; - } elseif ($delta > 0) { - $lowIndex = $mid+1; - } else { - return $this->_termDictionaryInfos[$mid]; // We got it! - } - } - - if ($highIndex == -1) { - // Term is out of the dictionary range - return null; - } - - $prevPosition = $highIndex; - $prevTerm = $this->_termDictionary[$prevPosition]; - $prevTermInfo = $this->_termDictionaryInfos[ $prevPosition ]; - - $tisFile = $this->openCompoundFile('.tis'); - $tiVersion = $tisFile->readInt(); - if ($tiVersion != (int)0xFFFFFFFE) { - throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format'); - } - - $termCount = $tisFile->readLong(); - $indexInterval = $tisFile->readInt(); - $skipInterval = $tisFile->readInt(); - - $tisFile->seek($prevTermInfo->indexPointer - 20 /* header size*/, SEEK_CUR); - - $termValue = $prevTerm->text; - $termFieldNum = $prevTerm->field; - $freqPointer = $prevTermInfo->freqPointer; - $proxPointer = $prevTermInfo->proxPointer; - for ($count = $prevPosition*$indexInterval + 1; - $count < $termCount && - ( $this->_fieldsDicPositions[ $termFieldNum ] < $searchDicField || - ($this->_fieldsDicPositions[ $termFieldNum ] == $searchDicField && - strcmp($termValue, $term->text) < 0) ); - $count++) { - $termPrefixLength = $tisFile->readVInt(); - $termSuffix = $tisFile->readString(); - $termFieldNum = $tisFile->readVInt(); - $termValue = substr( $termValue, 0, $termPrefixLength ) . $termSuffix; - - $docFreq = $tisFile->readVInt(); - $freqPointer += $tisFile->readVInt(); - $proxPointer += $tisFile->readVInt(); - if( $docFreq >= $skipInterval ) { - $skipOffset = $tisFile->readVInt(); - } else { - $skipOffset = 0; - } - } - - if ($termFieldNum == $searchField && $termValue == $term->text) { - return new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset); - } else { - return null; - } - } - - /** - * Returns normalization factor for specified documents - * - * @param integer $id - * @param string $fieldName - * @return string - */ - public function norm($id, $fieldName) - { - $fieldNum = $this->getFieldNum($fieldName); - - if ( !($this->_fields[$fieldNum]->isIndexed) ) { - return null; - } - - if ( !isset( $this->_norms[$fieldNum] )) { - $fFile = $this->openCompoundFile('.f' . $fieldNum); - $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount); - } - - return Zend_Search_Lucene_Search_Similarity::decodeNorm( ord($this->_norms[$fieldNum]{$id}) ); - } -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentWriter.php b/buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentWriter.php deleted file mode 100644 index f90d6ed3..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentWriter.php +++ /dev/null @@ -1,491 +0,0 @@ - normVector - * normVector is a binary string. - * Each byte corresponds to an indexed document in a segment and - * encodes normalization factor (float value, encoded by - * Zend_Search_Lucene_Search_Similarity::encodeNorm()) - * - * @var array - */ - private $_norms; - - - /** - * '.fdx' file - Stored Fields, the field index. - * - * @var Zend_Search_Lucene_Storage_File - */ - private $_fdxFile; - - /** - * '.fdx' file - Stored Fields, the field data. - * - * @var Zend_Search_Lucene_Storage_File - */ - private $_fdtFile; - - - /** - * Object constructor. - * - * @param Zend_Search_Lucene_Storage_Directory $directory - * @param string $name - */ - public function __construct($directory, $name) - { - $this->_directory = $directory; - $this->_name = $name; - $this->_docCount = 0; - - $this->_fields = array(); - $this->_termDocs = array(); - $this->_files = array(); - $this->_norms = array(); - - $this->_fdxFile = null; - $this->_fdtFile = null; - } - - - /** - * Add field to the segment - * - * @param Zend_Search_Lucene_Field $field - */ - private function _addFieldInfo(Zend_Search_Lucene_Field $field) - { - if (!isset($this->_fields[$field->name])) { - $this->_fields[$field->name] = - new Zend_Search_Lucene_Index_FieldInfo($field->name, - $field->isIndexed, - count($this->_fields), - $field->storeTermVector); - } else { - $this->_fields[$field->name]->isIndexed |= $field->isIndexed; - $this->_fields[$field->name]->storeTermVector |= $field->storeTermVector; - } - } - - - /** - * Adds a document to this segment. - * - * @param Zend_Search_Lucene_Document $document - * @throws Zend_Search_Lucene_Exception - */ - public function addDocument(Zend_Search_Lucene_Document $document) - { - $storedFields = array(); - - foreach ($document->getFieldNames() as $fieldName) { - $field = $document->getField($fieldName); - $this->_addFieldInfo($field); - - if ($field->storeTermVector) { - /** - * @todo term vector storing support - */ - throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.'); - } - - if ($field->isIndexed) { - if ($field->isTokenized) { - $tokenList = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($field->stringValue); - } else { - $tokenList = array(); - $tokenList[] = new Zend_Search_Lucene_Analysis_Token($field->stringValue, 0, strlen($field->stringValue)); - } - - $position = 0; - foreach ($tokenList as $token) { - $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name); - $termKey = $term->key(); - - if (!isset($this->_termDictionary[$termKey])) { - // New term - $this->_termDictionary[$termKey] = $term; - $this->_termDocs[$termKey] = array(); - $this->_termDocs[$termKey][$this->_docCount] = array(); - } else if (!isset($this->_termDocs[$termKey][$this->_docCount])) { - // Existing term, but new term entry - $this->_termDocs[$termKey][$this->_docCount] = array(); - } - $position += $token->getPositionIncrement(); - $this->_termDocs[$termKey][$this->_docCount][] = $position; - } - } - - if ($field->isStored) { - $storedFields[] = $field; - } - } - - if (count($storedFields) != 0) { - if (!isset($this->_fdxFile)) { - $this->_fdxFile = $this->_directory->createFile($this->_name . '.fdx'); - $this->_fdtFile = $this->_directory->createFile($this->_name . '.fdt'); - - $this->_files[] = $this->_name . '.fdx'; - $this->_files[] = $this->_name . '.fdt'; - } - - $this->_fdxFile->writeLong($this->_fdtFile->tell()); - - $this->_fdtFile->writeVInt(count($storedFields)); - foreach ($storedFields as $field) { - $this->_fdtFile->writeVInt($this->_fields[$field->name]->number); - $this->_fdtFile->writeByte($field->isTokenized ? 0x01 : 0x00 | - $field->isBinary ? 0x02 : 0x00 | - 0x00 /* 0x04 - third bit, compressed (ZLIB) */ ); - if ($field->isBinary) { - $this->_fdtFile->writeVInt(strlen($field->stringValue)); - $this->_fdtFile->writeBytes($field->stringValue); - } else { - $this->_fdtFile->writeString($field->stringValue); - } - } - } - - $this->_docCount++; - } - - - /** - * Dump Field Info (.fnm) segment file - */ - private function _dumpFNM() - { - $fnmFile = $this->_directory->createFile($this->_name . '.fnm'); - $fnmFile->writeVInt(count($this->_fields)); - - foreach ($this->_fields as $field) { - $fnmFile->writeString($field->name); - $fnmFile->writeByte(($field->isIndexed ? 0x01 : 0x00) | - ($field->storeTermVector ? 0x02 : 0x00) | -// not supported yet 0x04 /* term positions are stored with the term vectors */ | -// not supported yet 0x08 /* term offsets are stored with the term vectors */ | -/* not supported yet */ 0x10 /* norms are omitted for the indexed field */ - ); - } - - $this->_files[] = $this->_name . '.fnm'; - } - - - /** - * Dump Term Dictionary segment file entry. - * Used to write entry to .tis or .tii files - * - * @param Zend_Search_Lucene_Storage_File $dicFile - * @param Zend_Search_Lucene_Index_Term $prevTerm - * @param Zend_Search_Lucene_Index_Term $term - * @param Zend_Search_Lucene_Index_TermInfo $prevTermInfo - * @param Zend_Search_Lucene_Index_TermInfo $termInfo - */ - private function _dumpTermDictEntry(Zend_Search_Lucene_Storage_File $dicFile, - &$prevTerm, Zend_Search_Lucene_Index_Term $term, - &$prevTermInfo, Zend_Search_Lucene_Index_TermInfo $termInfo) - { - if (isset($prevTerm) && $prevTerm->field == $term->field) { - $prefixLength = 0; - while ($prefixLength < strlen($prevTerm->text) && - $prefixLength < strlen($term->text) && - $prevTerm->text{$prefixLength} == $term->text{$prefixLength} - ) { - $prefixLength++; - } - // Write preffix length - $dicFile->writeVInt($prefixLength); - // Write suffix - $dicFile->writeString( substr($term->text, $prefixLength) ); - } else { - // Write preffix length - $dicFile->writeVInt(0); - // Write suffix - $dicFile->writeString($term->text); - } - // Write field number - $dicFile->writeVInt($term->field); - // DocFreq (the count of documents which contain the term) - $dicFile->writeVInt($termInfo->docFreq); - - $prevTerm = $term; - - if (!isset($prevTermInfo)) { - // Write FreqDelta - $dicFile->writeVInt($termInfo->freqPointer); - // Write ProxDelta - $dicFile->writeVInt($termInfo->proxPointer); - } else { - // Write FreqDelta - $dicFile->writeVInt($termInfo->freqPointer - $prevTermInfo->freqPointer); - // Write ProxDelta - $dicFile->writeVInt($termInfo->proxPointer - $prevTermInfo->proxPointer); - } - // Write SkipOffset - it's not 0 when $termInfo->docFreq > self::$skipInterval - if ($termInfo->skipOffset != 0) { - $dicFile->writeVInt($termInfo->skipOffset); - } - - $prevTermInfo = $termInfo; - } - - /** - * Dump Term Dictionary (.tis) and Term Dictionary Index (.tii) segment files - */ - private function _dumpDictionary() - { - $tisFile = $this->_directory->createFile($this->_name . '.tis'); - $tisFile->writeInt((int)0xFFFFFFFE); - $tisFile->writeLong(count($this->_termDictionary)); - $tisFile->writeInt(self::$indexInterval); - $tisFile->writeInt(self::$skipInterval); - - $tiiFile = $this->_directory->createFile($this->_name . '.tii'); - $tiiFile->writeInt((int)0xFFFFFFFE); - $tiiFile->writeLong((int)((count($this->_termDictionary) - 1)/self::$indexInterval) + 1); - $tiiFile->writeInt(self::$indexInterval); - $tiiFile->writeInt(self::$skipInterval); - - $frqFile = $this->_directory->createFile($this->_name . '.frq'); - $prxFile = $this->_directory->createFile($this->_name . '.prx'); - - $termKeys = array_keys($this->_termDictionary); - sort($termKeys, SORT_STRING); - - $termCount = 0; - - $prevTerm = null; - $prevTermInfo = null; - $prevIndexTerm = null; - $prevIndexTermInfo = null; - $prevIndexPosition = 0; - - foreach ($termKeys as $termId) { - $freqPointer = $frqFile->tell(); - $proxPointer = $prxFile->tell(); - - $prevDoc = 0; - foreach ($this->_termDocs[$termId] as $docId => $termPositions) { - $docDelta = ($docId - $prevDoc)*2; - $prevDoc = $docId; - if (count($termPositions) > 1) { - $frqFile->writeVInt($docDelta); - $frqFile->writeVInt(count($termPositions)); - } else { - $frqFile->writeVInt($docDelta + 1); - } - - $prevPosition = 0; - foreach ($termPositions as $position) { - $prxFile->writeVInt($position - $prevPosition); - $prevPosition = $position; - } - } - - if (count($this->_termDocs[$termId]) >= self::$skipInterval) { - /** - * @todo Write Skip Data to a freq file. - * It's not used now, but must be implemented to be compatible with Lucene - */ - $skipOffset = $frqFile->tell() - $freqPointer; - } else { - $skipOffset = 0; - } - - $term = new Zend_Search_Lucene_Index_Term($this->_termDictionary[$termId]->text, - $this->_fields[$this->_termDictionary[$termId]->field]->number); - $termInfo = new Zend_Search_Lucene_Index_TermInfo(count($this->_termDocs[$termId]), - $freqPointer, $proxPointer, $skipOffset); - - $this->_dumpTermDictEntry($tisFile, $prevTerm, $term, $prevTermInfo, $termInfo); - - if ($termCount % self::$indexInterval == 0) { - $this->_dumpTermDictEntry($tiiFile, $prevIndexTerm, $term, $prevIndexTermInfo, $termInfo); - - $indexPosition = $tisFile->tell(); - $tiiFile->writeVInt($indexPosition - $prevIndexPosition); - $prevIndexPosition = $indexPosition; - } - $termCount++; - } - - $this->_files[] = $this->_name . '.tis'; - $this->_files[] = $this->_name . '.tii'; - $this->_files[] = $this->_name . '.frq'; - $this->_files[] = $this->_name . '.prx'; - } - - - /** - * Generate compound index file - */ - private function _generateCFS() - { - $cfsFile = $this->_directory->createFile($this->_name . '.cfs'); - $cfsFile->writeVInt(count($this->_files)); - - $dataOffsetPointers = array(); - foreach ($this->_files as $fileName) { - $dataOffsetPointers[$fileName] = $cfsFile->tell(); - $cfsFile->writeLong(0); // write dummy data - $cfsFile->writeString($fileName); - } - - foreach ($this->_files as $fileName) { - // Get actual data offset - $dataOffset = $cfsFile->tell(); - // Seek to the data offset pointer - $cfsFile->seek($dataOffsetPointers[$fileName]); - // Write actual data offset value - $cfsFile->writeLong($dataOffset); - // Seek back to the end of file - $cfsFile->seek($dataOffset); - - $dataFile = $this->_directory->getFileObject($fileName); - $cfsFile->writeBytes($dataFile->readBytes($this->_directory->fileLength($fileName))); - - $this->_directory->deleteFile($fileName); - } - } - - - /** - * Close segment, write it to disk and return segment info - * - * @return Zend_Search_Lucene_Index_SegmentInfo - */ - public function close() - { - if ($this->_docCount == 0) { - return null; - } - - $this->_dumpFNM(); - $this->_dumpDictionary(); - - $this->_generateCFS(); - - return new Zend_Search_Lucene_Index_SegmentInfo($this->_name, - $this->_docCount, - $this->_directory); - } - -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Index/Term.php b/buildscripts/texbuilder/Zend/Search/Lucene/Index/Term.php deleted file mode 100644 index e30ce587..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Index/Term.php +++ /dev/null @@ -1,70 +0,0 @@ -field = $field; - $this->text = $text; - } - - - /** - * @todo docblock - */ - public function key() - { - return $this->field . chr(0) . $this->text; - } -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Index/TermInfo.php b/buildscripts/texbuilder/Zend/Search/Lucene/Index/TermInfo.php deleted file mode 100644 index ddef721d..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Index/TermInfo.php +++ /dev/null @@ -1,77 +0,0 @@ -docFreq = $docFreq; - $this->freqPointer = $freqPointer; - $this->proxPointer = $proxPointer; - $this->skipOffset = $skipOffset; - $this->indexPointer = $indexPointer; - } -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Index/Writer.php b/buildscripts/texbuilder/Zend/Search/Lucene/Index/Writer.php deleted file mode 100644 index da4af000..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Index/Writer.php +++ /dev/null @@ -1,308 +0,0 @@ -_directory = $directory; - - if ($create) { - foreach ($this->_directory->fileList() as $file) { - if ($file == 'deletable' || - $file == 'segments' || - substr($file, strlen($file)-4) == '.cfs') { - $this->_directory->deleteFile($file); - } - } - $segmentsFile = $this->_directory->createFile('segments'); - $segmentsFile->writeInt((int)0xFFFFFFFF); - // write version - $segmentsFile->writeLong(0); - // write name counter - $segmentsFile->writeInt(0); - // write segment counter - $segmentsFile->writeInt(0); - - $deletableFile = $this->_directory->createFile('deletable'); - // write counter - $deletableFile->writeInt(0); - - $this->_version = 0; - $this->_segmentNameCounter = 0; - $this->_segments = 0; - } else { - $segmentsFile = $this->_directory->getFileObject('segments'); - $format = $segmentsFile->readInt(); - if ($format != (int)0xFFFFFFFF) { - throw new Zend_Search_Lucene_Exception('Wrong segments file format'); - } - - // read version - $this->_version = $segmentsFile->readLong(); - // read counter - $this->_segmentNameCounter = $segmentsFile->readInt(); - // read segment counter - $this->_segments = $segmentsFile->readInt(); - } - - $this->_newSegments = array(); - $this->_currentSegment = null; - } - - /** - * Adds a document to this index. - * - * @param Zend_Search_Lucene_Document $document - */ - public function addDocument(Zend_Search_Lucene_Document $document) - { - if ($this->_currentSegment === null) { - $this->_currentSegment = - new Zend_Search_Lucene_Index_SegmentWriter($this->_directory, $this->_newSegmentName()); - } - $this->_currentSegment->addDocument($document); - $this->_version++; - } - - - - /** - * Update segments file by adding current segment to a list - * @todo !!!!!Finish the implementation - * - * @throws Zend_Search_Lucene_Exception - */ - private function _updateSegments() - { - $segmentsFile = $this->_directory->getFileObject('segments'); - $newSegmentFile = $this->_directory->createFile('segments.new'); - - $newSegmentFile->writeInt((int)0xFFFFFFFF); - $newSegmentFile->writeLong($this->_version); - $newSegmentFile->writeInt($this->_segmentNameCounter); - $newSegmentFile->writeInt($this->_segments + count($this->_newSegments)); - - $segmentsFile->seek(20); - $newSegmentFile->writeBytes($segmentsFile->readBytes($this->_directory->fileLength('segments') - 20)); - - foreach ($this->_newSegments as $segmentName => $segmentInfo) { - $newSegmentFile->writeString($segmentName); - $newSegmentFile->writeInt($segmentInfo->count()); - } - - $this->_directory->renameFile('segments.new', 'segments'); - } - - - /** - * Commit current changes - * returns array of new segments - * - * @return array - */ - public function commit() - { - if ($this->_currentSegment !== null) { - $newSegment = $this->_currentSegment->close(); - if ($newSegment !== null) { - $this->_newSegments[$newSegment->getName()] = $newSegment; - } - $this->_currentSegment = null; - } - - if (count($this->_newSegments) != 0) { - $this->_updateSegments(); - } - - $result = $this->_newSegments; - $this->_newSegments = array(); - - return $result; - } - - - /** - * Merges the provided indexes into this index. - * - * @param array $readers - * @return void - */ - public function addIndexes($readers) - { - /** - * @todo implementation - */ - } - - - /** - * Returns the number of documents currently in this index. - * - * @return integer - */ - public function docCount($readers) - { - /** - * @todo implementation - */ - } - - - /** - * Flushes all changes to an index and closes all associated files. - * - */ - public function close() - { - /** - * @todo implementation - */ - } - - - /** - * Merges all segments together into a single segment, optimizing - * an index for search. - * - * return void - */ - public function optimize() - { - /** - * @todo implementation - */ - } - - /** - * Get name for new segment - * - * @return string - */ - private function _newSegmentName() - { - return '_' . base_convert($this->_segmentNameCounter++, 10, 36); - } - -} diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Search/Query.php b/buildscripts/texbuilder/Zend/Search/Lucene/Search/Query.php deleted file mode 100644 index dd8698e8..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Search/Query.php +++ /dev/null @@ -1,98 +0,0 @@ -_boost; - } - - /** - * Sets the boost for this query clause to $boost. - * - * @param float $boost - */ - public function setBoost($boost) - { - $this->_boost = $boost; - } - - /** - * Score specified document - * - * @param integer $docId - * @param Zend_Search_Lucene $reader - * @return float - */ - abstract public function score($docId, $reader); - - /** - * Constructs an appropriate Weight implementation for this query. - * - * @param Zend_Search_Lucene $reader - * @return Zend_Search_Lucene_Search_Weight - */ - abstract protected function _createWeight($reader); - - /** - * Constructs an initializes a Weight for a query. - * - * @param Zend_Search_Lucene $reader - */ - protected function _initWeight($reader) - { - $this->_weight = $this->_createWeight($reader); - $sum = $this->_weight->sumOfSquaredWeights(); - $queryNorm = $reader->getSimilarity()->queryNorm($sum); - $this->_weight->normalize($queryNorm); - } - -} \ No newline at end of file diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Search/Query/MultiTerm.php b/buildscripts/texbuilder/Zend/Search/Lucene/Search/Query/MultiTerm.php deleted file mode 100644 index 4a99c0f7..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Search/Query/MultiTerm.php +++ /dev/null @@ -1,437 +0,0 @@ - (docId => array( pos1, pos2, ... ), ...) - * term2Id => (docId => array( pos1, pos2, ... ), ...) - * - * @var array - */ - private $_termsPositions = array(); - - - /** - * A score factor based on the fraction of all query terms - * that a document contains. - * float for conjunction queries - * array of float for non conjunction queries - * - * @var mixed - */ - private $_coord = null; - - - /** - * Terms weights - * array of Zend_Search_Lucene_Search_Weight - * - * @var array - */ - private $_weights = array(); - - - /** - * Class constructor. Create a new multi-term query object. - * - * @param array $terms Array of Zend_Search_Lucene_Index_Term objects - * @param array $signs Array of signs. Sign is boolean|null. - * @return void - */ - public function __construct($terms = null, $signs = null) - { - /** - * @todo Check contents of $terms and $signs before adding them. - */ - if (is_array($terms)) { - $this->_terms = $terms; - - $this->_signs = null; - // Check if all terms are required - if (is_array($signs)) { - foreach ($signs as $sign ) { - if ($sign !== true) { - $this->_signs = $signs; - continue; - } - } - } - } - } - - - /** - * Add a $term (Zend_Search_Lucene_Index_Term) to this query. - * - * The sign is specified as: - * TRUE - term is required - * FALSE - term is prohibited - * NULL - term is neither prohibited, nor required - * - * @param Zend_Search_Lucene_Index_Term $term - * @param boolean|null $sign - * @return void - */ - public function addTerm(Zend_Search_Lucene_Index_Term $term, $sign=null) { - $this->_terms[] = $term; - - /** - * @todo This is not good. Sometimes $this->_signs is an array, sometimes - * it is null, even when there are terms. It will be changed so that - * it is always an array. - */ - if ($this->_signs === null) { - if ($sign !== null) { - $this->_signs = array(); - foreach ($this->_terms as $term) { - $this->_signs[] = null; - } - $this->_signs[] = $sign; - } - } else { - $this->_signs[] = $sign; - } - } - - - /** - * Returns query term - * - * @return array - */ - public function getTerms() - { - return $this->_terms; - } - - - /** - * Return terms signs - * - * @return array - */ - public function getSigns() - { - return $this->_signs; - } - - - /** - * Set weight for specified term - * - * @param integer $num - * @param Zend_Search_Lucene_Search_Weight_Term $weight - */ - public function setWeight($num, $weight) - { - $this->_weights[$num] = $weight; - } - - - /** - * Constructs an appropriate Weight implementation for this query. - * - * @param Zend_Search_Lucene $reader - * @return Zend_Search_Lucene_Search_Weight - */ - protected function _createWeight($reader) - { - return new Zend_Search_Lucene_Search_Weight_MultiTerm($this, $reader); - } - - - /** - * Calculate result vector for Conjunction query - * (like '+something +another') - * - * @param Zend_Search_Lucene $reader - */ - private function _calculateConjunctionResult($reader) - { - if (extension_loaded('bitset')) { - foreach( $this->_terms as $termId=>$term ) { - if($this->_resVector === null) { - $this->_resVector = bitset_from_array($reader->termDocs($term)); - } else { - $this->_resVector = bitset_intersection( - $this->_resVector, - bitset_from_array($reader->termDocs($term)) ); - } - - $this->_termsPositions[$termId] = $reader->termPositions($term); - } - } else { - foreach( $this->_terms as $termId=>$term ) { - if($this->_resVector === null) { - $this->_resVector = array_flip($reader->termDocs($term)); - } else { - $termDocs = array_flip($reader->termDocs($term)); - foreach($this->_resVector as $key=>$value) { - if (!isset( $termDocs[$key] )) { - unset( $this->_resVector[$key] ); - } - } - } - - $this->_termsPositions[$termId] = $reader->termPositions($term); - } - } - } - - - /** - * Calculate result vector for non Conjunction query - * (like '+something -another') - * - * @param Zend_Search_Lucene $reader - */ - private function _calculateNonConjunctionResult($reader) - { - if (extension_loaded('bitset')) { - $required = null; - $neither = bitset_empty(); - $prohibited = bitset_empty(); - - foreach ($this->_terms as $termId => $term) { - $termDocs = bitset_from_array($reader->termDocs($term)); - - if ($this->_signs[$termId] === true) { - // required - if ($required !== null) { - $required = bitset_intersection($required, $termDocs); - } else { - $required = $termDocs; - } - } elseif ($this->_signs[$termId] === false) { - // prohibited - $prohibited = bitset_union($prohibited, $termDocs); - } else { - // neither required, nor prohibited - $neither = bitset_union($neither, $termDocs); - } - - $this->_termsPositions[$termId] = $reader->termPositions($term); - } - - if ($required === null) { - $required = $neither; - } - $this->_resVector = bitset_intersection( $required, - bitset_invert($prohibited, $reader->count()) ); - } else { - $required = null; - $neither = array(); - $prohibited = array(); - - foreach ($this->_terms as $termId => $term) { - $termDocs = array_flip($reader->termDocs($term)); - - if ($this->_signs[$termId] === true) { - // required - if ($required !== null) { - // substitute for bitset_intersection - foreach ($required as $key => $value) { - if (!isset( $termDocs[$key] )) { - unset($required[$key]); - } - } - } else { - $required = $termDocs; - } - } elseif ($this->_signs[$termId] === false) { - // prohibited - // substitute for bitset_union - foreach ($termDocs as $key => $value) { - $prohibited[$key] = $value; - } - } else { - // neither required, nor prohibited - // substitute for bitset_union - foreach ($termDocs as $key => $value) { - $neither[$key] = $value; - } - } - - $this->_termsPositions[$termId] = $reader->termPositions($term); - } - - if ($required === null) { - $required = $neither; - } - - foreach ($required as $key=>$value) { - if (isset( $prohibited[$key] )) { - unset($required[$key]); - } - } - $this->_resVector = $required; - } - } - - - /** - * Score calculator for conjunction queries (all terms are required) - * - * @param integer $docId - * @param Zend_Search_Lucene $reader - * @return float - */ - public function _conjunctionScore($docId, $reader) - { - if ($this->_coord === null) { - $this->_coord = $reader->getSimilarity()->coord(count($this->_terms), - count($this->_terms) ); - } - - $score = 0.0; - - foreach ($this->_terms as $termId=>$term) { - $score += $reader->getSimilarity()->tf(count($this->_termsPositions[$termId][$docId]) ) * - $this->_weights[$termId]->getValue() * - $reader->norm($docId, $term->field); - } - - return $score * $this->_coord; - } - - - /** - * Score calculator for non conjunction queries (not all terms are required) - * - * @param integer $docId - * @param Zend_Search_Lucene $reader - * @return float - */ - public function _nonConjunctionScore($docId, $reader) - { - if ($this->_coord === null) { - $this->_coord = array(); - - $maxCoord = 0; - foreach ($this->_signs as $sign) { - if ($sign !== false /* not prohibited */) { - $maxCoord++; - } - } - - for ($count = 0; $count <= $maxCoord; $count++) { - $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord); - } - } - - $score = 0.0; - $matchedTerms = 0; - foreach ($this->_terms as $termId=>$term) { - // Check if term is - if ($this->_signs[$termId] !== false && // not prohibited - isset($this->_termsPositions[$termId][$docId]) // matched - ) { - $matchedTerms++; - $score += - $reader->getSimilarity()->tf(count($this->_termsPositions[$termId][$docId]) ) * - $this->_weights[$termId]->getValue() * - $reader->norm($docId, $term->field); - } - } - - return $score * $this->_coord[$matchedTerms]; - } - - /** - * Score specified document - * - * @param integer $docId - * @param Zend_Search_Lucene $reader - * @return float - */ - public function score($docId, $reader) - { - if($this->_resVector === null) { - if ($this->_signs === null) { - $this->_calculateConjunctionResult($reader); - } else { - $this->_calculateNonConjunctionResult($reader); - } - - $this->_initWeight($reader); - } - - if ( (extension_loaded('bitset')) ? - bitset_in($this->_resVector, $docId) : - isset($this->_resVector[$docId]) ) { - if ($this->_signs === null) { - return $this->_conjunctionScore($docId, $reader); - } else { - return $this->_nonConjunctionScore($docId, $reader); - } - } else { - return 0; - } - } -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Search/Query/Phrase.php b/buildscripts/texbuilder/Zend/Search/Lucene/Search/Query/Phrase.php deleted file mode 100644 index 3e52666b..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Search/Query/Phrase.php +++ /dev/null @@ -1,424 +0,0 @@ - (docId => array( pos1, pos2, ... ), ...) - * term2Id => (docId => array( pos1, pos2, ... ), ...) - * - * @var array - */ - private $_termsPositions = array(); - - /** - * Class constructor. Create a new prase query. - * - * @param string $field Field to search. - * @param array $terms Terms to search Array of strings. - * @param array $offsets Relative term positions. Array of integers. - * @throws Zend_Search_Lucene_Exception - */ - public function __construct($terms = null, $offsets = null, $field = null) - { - $this->_slop = 0; - - if (is_array($terms)) { - $this->_terms = array(); - foreach ($terms as $termId => $termText) { - $this->_terms[$termId] = ($field !== null)? new Zend_Search_Lucene_Index_Term($termText, $field): - new Zend_Search_Lucene_Index_Term($termText); - } - } else if ($terms === null) { - $this->_terms = array(); - } else { - throw new Zend_Search_Lucene_Exception('terms argument must be array of strings or null'); - } - - if (is_array($offsets)) { - if (count($this->_terms) != count($offsets)) { - throw new Zend_Search_Lucene_Exception('terms and offsets arguments must have the same size.'); - } - $this->_offsets = $offsets; - } else if ($offsets === null) { - $this->_offsets = array(); - foreach ($this->_terms as $termId => $term) { - $position = count($this->_offsets); - $this->_offsets[$termId] = $position; - } - } else { - throw new Zend_Search_Lucene_Exception('offsets argument must be array of strings or null'); - } - } - - /** - * Set slop - * - * @param integer $slop - */ - public function setSlop($slop) - { - $this->_slop = $slop; - } - - - /** - * Get slop - * - * @return integer - */ - public function getSlop() - { - return $this->_slop; - } - - - /** - * Adds a term to the end of the query phrase. - * The relative position of the term is specified explicitly or the one immediately - * after the last term added. - * - * @param Zend_Search_Lucene_Index_Term $term - * @param integer $position - */ - public function addTerm(Zend_Search_Lucene_Index_Term $term, $position = null) { - if ((count($this->_terms) != 0)&&(end($this->_terms)->field != $term->field)) { - throw new Zend_Search_Lucene_Exception('All phrase terms must be in the same field: ' . - $term->field . ':' . $term->text); - } - - $this->_terms[] = $term; - if ($position !== null) { - $this->_offsets[] = $position; - } else if (count($this->_offsets) != 0) { - $this->_offsets[] = end($this->_offsets) + 1; - } else { - $this->_offsets[] = 0; - } - } - - - /** - * Returns query term - * - * @return array - */ - public function getTerms() - { - return $this->_terms; - } - - - /** - * Set weight for specified term - * - * @param integer $num - * @param Zend_Search_Lucene_Search_Weight_Term $weight - */ - public function setWeight($num, $weight) - { - $this->_weights[$num] = $weight; - } - - - /** - * Constructs an appropriate Weight implementation for this query. - * - * @param Zend_Search_Lucene $reader - * @return Zend_Search_Lucene_Search_Weight - */ - protected function _createWeight($reader) - { - return new Zend_Search_Lucene_Search_Weight_Phrase($this, $reader); - } - - - /** - * Calculate result vector - * - * @param Zend_Search_Lucene $reader - */ - private function _calculateResult($reader) - { - if (extension_loaded('bitset')) { - foreach( $this->_terms as $termId=>$term ) { - if($this->_resVector === null) { - $this->_resVector = bitset_from_array($reader->termDocs($term)); - } else { - $this->_resVector = bitset_intersection( - $this->_resVector, - bitset_from_array($reader->termDocs($term)) ); - } - - $this->_termsPositions[$termId] = $reader->termPositions($term); - } - } else { - foreach( $this->_terms as $termId=>$term ) { - if($this->_resVector === null) { - $this->_resVector = array_flip($reader->termDocs($term)); - } else { - $termDocs = array_flip($reader->termDocs($term)); - foreach($this->_resVector as $key=>$value) { - if (!isset( $termDocs[$key] )) { - unset( $this->_resVector[$key] ); - } - } - } - - $this->_termsPositions[$termId] = $reader->termPositions($term); - } - } - } - - - /** - * Score calculator for exact phrase queries (terms sequence is fixed) - * - * @param integer $docId - * @return float - */ - public function _exactPhraseFreq($docId) - { - $freq = 0; - - // Term Id with lowest cardinality - $lowCardTermId = null; - - // Calculate $lowCardTermId - foreach ($this->_terms as $termId => $term) { - if ($lowCardTermId === null || - count($this->_termsPositions[$termId][$docId]) < - count($this->_termsPositions[$lowCardTermId][$docId]) ) { - $lowCardTermId = $termId; - } - } - - // Walk through positions of the term with lowest cardinality - foreach ($this->_termsPositions[$lowCardTermId][$docId] as $lowCardPos) { - // We expect phrase to be found - $freq++; - - // Walk through other terms - foreach ($this->_terms as $termId => $term) { - if ($termId != $lowCardTermId) { - $expectedPosition = $lowCardPos + - ($this->_offsets[$termId] - - $this->_offsets[$lowCardTermId]); - - if (!in_array($expectedPosition, $this->_termsPositions[$termId][$docId])) { - $freq--; // Phrase wasn't found. - break; - } - } - } - } - - return $freq; - } - - /** - * Score calculator for sloppy phrase queries (terms sequence is fixed) - * - * @param integer $docId - * @param Zend_Search_Lucene $reader - * @return float - */ - public function _sloppyPhraseFreq($docId, Zend_Search_Lucene $reader) - { - $freq = 0; - - $phraseQueue = array(); - $phraseQueue[0] = array(); // empty phrase - $lastTerm = null; - - // Walk through the terms to create phrases. - foreach ($this->_terms as $termId => $term) { - $queueSize = count($phraseQueue); - $firstPass = true; - - // Walk through the term positions. - // Each term position produces a set of phrases. - foreach ($this->_termsPositions[$termId][$docId] as $termPosition ) { - if ($firstPass) { - for ($count = 0; $count < $queueSize; $count++) { - $phraseQueue[$count][$termId] = $termPosition; - } - } else { - for ($count = 0; $count < $queueSize; $count++) { - if ($lastTerm !== null && - abs( $termPosition - $phraseQueue[$count][$lastTerm] - - ($this->_offsets[$termId] - $this->_offsets[$lastTerm])) > $this->_slop) { - continue; - } - - $newPhraseId = count($phraseQueue); - $phraseQueue[$newPhraseId] = $phraseQueue[$count]; - $phraseQueue[$newPhraseId][$termId] = $termPosition; - } - - } - - $firstPass = false; - } - $lastTerm = $termId; - } - - - foreach ($phraseQueue as $phrasePos) { - $minDistance = null; - - for ($shift = -$this->_slop; $shift <= $this->_slop; $shift++) { - $distance = 0; - $start = reset($phrasePos) - reset($this->_offsets) + $shift; - - foreach ($this->_terms as $termId => $term) { - $distance += abs($phrasePos[$termId] - $this->_offsets[$termId] - $start); - - if($distance > $this->_slop) { - break; - } - } - - if ($minDistance === null || $distance < $minDistance) { - $minDistance = $distance; - } - } - - if ($minDistance <= $this->_slop) { - $freq += $reader->getSimilarity()->sloppyFreq($minDistance); - } - } - - return $freq; - } - - - /** - * Score specified document - * - * @param integer $docId - * @param Zend_Search_Lucene $reader - * @return float - */ - public function score($docId, $reader) - { - // optimize zero-term case - if (count($this->_terms) == 0) { - return 0; - } - - if($this->_resVector === null) { - $this->_calculateResult($reader); - $this->_initWeight($reader); - } - - if ( (extension_loaded('bitset')) ? - bitset_in($this->_resVector, $docId) : - isset($this->_resVector[$docId]) ) { - if ($this->_slop == 0) { - $freq = $this->_exactPhraseFreq($docId); - } else { - $freq = $this->_sloppyPhraseFreq($docId, $reader); - } - -/* - return $reader->getSimilarity()->tf($freq) * - $this->_weight->getValue() * - $reader->norm($docId, reset($this->_terms)->field); -*/ - if ($freq != 0) { - $tf = $reader->getSimilarity()->tf($freq); - $weight = $this->_weight->getValue(); - $norm = $reader->norm($docId, reset($this->_terms)->field); - - return $tf*$weight*$norm; - } - } else { - return 0; - } - } -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Search/Query/Term.php b/buildscripts/texbuilder/Zend/Search/Lucene/Search/Query/Term.php deleted file mode 100644 index d622f845..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Search/Query/Term.php +++ /dev/null @@ -1,126 +0,0 @@ - array( pos1, pos2, ... ) - * - * @var array - */ - private $_termPositions; - - - /** - * Zend_Search_Lucene_Search_Query_Term constructor - * - * @param Zend_Search_Lucene_Index_Term $term - * @param boolean $sign - */ - public function __construct( $term, $sign = true ) - { - $this->_term = $term; - $this->_sign = $sign; - } - - - /** - * Constructs an appropriate Weight implementation for this query. - * - * @param Zend_Search_Lucene $reader - * @return Zend_Search_Lucene_Search_Weight - */ - protected function _createWeight($reader) - { - return new Zend_Search_Lucene_Search_Weight_Term($this->_term, $this, $reader); - } - - /** - * Score specified document - * - * @param integer $docId - * @param Zend_Search_Lucene $reader - * @return float - */ - public function score( $docId, $reader ) - { - if($this->_docVector===null) { - if (extension_loaded('bitset')) { - $this->_docVector = bitset_from_array( $reader->termDocs($this->_term) ); - } else { - $this->_docVector = array_flip($reader->termDocs($this->_term)); - } - - $this->_termPositions = $reader->termPositions($this->_term); - $this->_initWeight($reader); - } - - $match = extension_loaded('bitset') ? bitset_in($this->_docVector, $docId) : - isset($this->_docVector[$docId]); - if ($this->_sign && $match) { - return $reader->getSimilarity()->tf(count($this->_termPositions[$docId]) ) * - $this->_weight->getValue() * - $reader->norm($docId, $this->_term->field); - } else { - return 0; - } - } -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Search/QueryHit.php b/buildscripts/texbuilder/Zend/Search/Lucene/Search/QueryHit.php deleted file mode 100644 index 65290a9e..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Search/QueryHit.php +++ /dev/null @@ -1,106 +0,0 @@ -_index = $index; - } - - - /** - * Convenience function for getting fields from the document - * associated with this hit. - * - * @param string $offset - * @return string - */ - public function __get($offset) - { - return $this->getDocument()->getFieldValue($offset); - } - - - /** - * Return the document object for this hit - * - * @return Zend_Search_Lucene_Document - */ - public function getDocument() - { - if (!$this->_document instanceof Zend_Search_Lucene_Document) { - $this->_document = $this->_index->getDocument($this->id); - } - - return $this->_document; - } - - - /** - * Return the index object for this hit - * - * @return Zend_Search_Lucene - */ - public function getIndex() - { - return $this->_index; - } -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Search/QueryParser.php b/buildscripts/texbuilder/Zend/Search/Lucene/Search/QueryParser.php deleted file mode 100644 index 9387afca..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Search/QueryParser.php +++ /dev/null @@ -1,140 +0,0 @@ -count()) { - throw new Zend_Search_Lucene_Exception('Syntax error: query string cannot be empty.'); - } - - // Term query - if ($tokens->count() == 1) { - if ($tokens->current()->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_WORD) { - return new Zend_Search_Lucene_Search_Query_Term(new Zend_Search_Lucene_Index_Term($tokens->current()->text, 'contents')); - } else { - throw new Zend_Search_Lucene_Exception('Syntax error: query string must contain at least one word.'); - } - } - - - /** - * MultiTerm Query - * - * Process each token that was returned by the tokenizer. - */ - $terms = array(); - $signs = array(); - $prevToken = null; - $openBrackets = 0; - $field = 'contents'; - foreach ($tokens as $token) { - switch ($token->type) { - case Zend_Search_Lucene_Search_QueryToken::TOKTYPE_WORD: - $terms[] = new Zend_Search_Lucene_Index_Term($token->text, $field); - $field = 'contents'; - if ($prevToken !== null && - $prevToken->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN) { - if ($prevToken->text == "+") { - $signs[] = true; - } else { - $signs[] = false; - } - } else { - $signs[] = null; - } - break; - case Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN: - if ($prevToken !== null && - $prevToken->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN) { - throw new Zend_Search_Lucene_Exception('Syntax error: sign operator must be followed by a word.'); - } - break; - case Zend_Search_Lucene_Search_QueryToken::TOKTYPE_FIELD: - $field = $token->text; - // let previous token to be signed as next $prevToken - $token = $prevToken; - break; - case Zend_Search_Lucene_Search_QueryToken::TOKTYPE_BRACKET: - $token->text=='(' ? $openBrackets++ : $openBrackets--; - } - $prevToken = $token; - } - - // Finish up parsing: check the last token in the query for an opening sign or parenthesis. - if ($prevToken->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN) { - throw new Zend_Search_Lucene_Exception('Syntax Error: sign operator must be followed by a word.'); - } - - // Finish up parsing: check that every opening bracket has a matching closing bracket. - if ($openBrackets != 0) { - throw new Zend_Search_Lucene_Exception('Syntax Error: mismatched parentheses, every opening must have closing.'); - } - - switch (count($terms)) { - case 0: - throw new Zend_Search_Lucene_Exception('Syntax error: bad term count.'); - case 1: - return new Zend_Search_Lucene_Search_Query_Term($terms[0],$signs[0] !== false); - default: - return new Zend_Search_Lucene_Search_Query_MultiTerm($terms,$signs); - } - } - -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Search/QueryToken.php b/buildscripts/texbuilder/Zend/Search/Lucene/Search/QueryToken.php deleted file mode 100644 index 995e0d3c..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Search/QueryToken.php +++ /dev/null @@ -1,102 +0,0 @@ -type = $tokType; - $this->text = $tokText; - } -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Search/QueryTokenizer.php b/buildscripts/texbuilder/Zend/Search/Lucene/Search/QueryTokenizer.php deleted file mode 100644 index 986f8899..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Search/QueryTokenizer.php +++ /dev/null @@ -1,162 +0,0 @@ -_tokens[] = new Zend_Search_Lucene_Search_QueryToken(Zend_Search_Lucene_Search_QueryToken::TOKTYPE_WORD, - $currentToken); - $currentToken = ''; - } - - if ($inputString{$count} == '+' || $inputString{$count} == '-') { - $this->_tokens[] = new Zend_Search_Lucene_Search_QueryToken(Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN, - $inputString{$count}); - } elseif ($inputString{$count} == '(' || $inputString{$count} == ')') { - $this->_tokens[] = new Zend_Search_Lucene_Search_QueryToken(Zend_Search_Lucene_Search_QueryToken::TOKTYPE_BRACKET, - $inputString{$count}); - } elseif ($inputString{$count} == ':' && $this->count()) { - if ($this->_tokens[count($this->_tokens)-1]->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_WORD) { - $this->_tokens[count($this->_tokens)-1]->type = Zend_Search_Lucene_Search_QueryToken::TOKTYPE_FIELD; - } - } - } - } - - if (strlen($currentToken)) { - $this->_tokens[] = new Zend_Search_Lucene_Search_QueryToken(Zend_Search_Lucene_Search_QueryToken::TOKTYPE_WORD, $currentToken); - } - } - - - /** - * Returns number of tokens - * - * @return integer - */ - public function count() - { - return count($this->_tokens); - } - - - /** - * Returns TRUE if a token exists at the current position. - * - * @return boolean - */ - public function valid() - { - return $this->_currToken < $this->count(); - } - - - /** - * Resets token stream. - * - * @return integer - */ - public function rewind() - { - $this->_currToken = 0; - } - - - /** - * Returns the token at the current position or FALSE if - * the position does not contain a valid token. - * - * @return mixed - */ - public function current() - { - return $this->valid() ? $this->_tokens[$this->_currToken] : false; - } - - - /** - * Returns next token - * - * @return Zend_Search_Lucene_Search_QueryToken - */ - public function next() - { - return ++$this->_currToken; - } - - - /** - * Return the position of the current token. - * - * @return integer - */ - public function key() - { - return $this->_currToken; - } - -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Search/Similarity.php b/buildscripts/texbuilder/Zend/Search/Lucene/Search/Similarity.php deleted file mode 100644 index 8b758213..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Search/Similarity.php +++ /dev/null @@ -1,551 +0,0 @@ - 0.0, - 1 => 5.820766E-10, - 2 => 6.9849193E-10, - 3 => 8.1490725E-10, - 4 => 9.313226E-10, - 5 => 1.1641532E-9, - 6 => 1.3969839E-9, - 7 => 1.6298145E-9, - 8 => 1.8626451E-9, - 9 => 2.3283064E-9, - 10 => 2.7939677E-9, - 11 => 3.259629E-9, - 12 => 3.7252903E-9, - 13 => 4.656613E-9, - 14 => 5.5879354E-9, - 15 => 6.519258E-9, - 16 => 7.4505806E-9, - 17 => 9.313226E-9, - 18 => 1.1175871E-8, - 19 => 1.3038516E-8, - 20 => 1.4901161E-8, - 21 => 1.8626451E-8, - 22 => 2.2351742E-8, - 23 => 2.6077032E-8, - 24 => 2.9802322E-8, - 25 => 3.7252903E-8, - 26 => 4.4703484E-8, - 27 => 5.2154064E-8, - 28 => 5.9604645E-8, - 29 => 7.4505806E-8, - 30 => 8.940697E-8, - 31 => 1.0430813E-7, - 32 => 1.1920929E-7, - 33 => 1.4901161E-7, - 34 => 1.7881393E-7, - 35 => 2.0861626E-7, - 36 => 2.3841858E-7, - 37 => 2.9802322E-7, - 38 => 3.5762787E-7, - 39 => 4.172325E-7, - 40 => 4.7683716E-7, - 41 => 5.9604645E-7, - 42 => 7.1525574E-7, - 43 => 8.34465E-7, - 44 => 9.536743E-7, - 45 => 1.1920929E-6, - 46 => 1.4305115E-6, - 47 => 1.66893E-6, - 48 => 1.9073486E-6, - 49 => 2.3841858E-6, - 50 => 2.861023E-6, - 51 => 3.33786E-6, - 52 => 3.8146973E-6, - 53 => 4.7683716E-6, - 54 => 5.722046E-6, - 55 => 6.67572E-6, - 56 => 7.6293945E-6, - 57 => 9.536743E-6, - 58 => 1.1444092E-5, - 59 => 1.335144E-5, - 60 => 1.5258789E-5, - 61 => 1.9073486E-5, - 62 => 2.2888184E-5, - 63 => 2.670288E-5, - 64 => 3.0517578E-5, - 65 => 3.8146973E-5, - 66 => 4.5776367E-5, - 67 => 5.340576E-5, - 68 => 6.1035156E-5, - 69 => 7.6293945E-5, - 70 => 9.1552734E-5, - 71 => 1.0681152E-4, - 72 => 1.2207031E-4, - 73 => 1.5258789E-4, - 74 => 1.8310547E-4, - 75 => 2.1362305E-4, - 76 => 2.4414062E-4, - 77 => 3.0517578E-4, - 78 => 3.6621094E-4, - 79 => 4.272461E-4, - 80 => 4.8828125E-4, - 81 => 6.1035156E-4, - 82 => 7.324219E-4, - 83 => 8.544922E-4, - 84 => 9.765625E-4, - 85 => 0.0012207031, - 86 => 0.0014648438, - 87 => 0.0017089844, - 88 => 0.001953125, - 89 => 0.0024414062, - 90 => 0.0029296875, - 91 => 0.0034179688, - 92 => 0.00390625, - 93 => 0.0048828125, - 94 => 0.005859375, - 95 => 0.0068359375, - 96 => 0.0078125, - 97 => 0.009765625, - 98 => 0.01171875, - 99 => 0.013671875, - 100 => 0.015625, - 101 => 0.01953125, - 102 => 0.0234375, - 103 => 0.02734375, - 104 => 0.03125, - 105 => 0.0390625, - 106 => 0.046875, - 107 => 0.0546875, - 108 => 0.0625, - 109 => 0.078125, - 110 => 0.09375, - 111 => 0.109375, - 112 => 0.125, - 113 => 0.15625, - 114 => 0.1875, - 115 => 0.21875, - 116 => 0.25, - 117 => 0.3125, - 118 => 0.375, - 119 => 0.4375, - 120 => 0.5, - 121 => 0.625, - 122 => 0.75, - 123 => 0.875, - 124 => 1.0, - 125 => 1.25, - 126 => 1.5, - 127 => 1.75, - 128 => 2.0, - 129 => 2.5, - 130 => 3.0, - 131 => 3.5, - 132 => 4.0, - 133 => 5.0, - 134 => 6.0, - 135 => 7.0, - 136 => 8.0, - 137 => 10.0, - 138 => 12.0, - 139 => 14.0, - 140 => 16.0, - 141 => 20.0, - 142 => 24.0, - 143 => 28.0, - 144 => 32.0, - 145 => 40.0, - 146 => 48.0, - 147 => 56.0, - 148 => 64.0, - 149 => 80.0, - 150 => 96.0, - 151 => 112.0, - 152 => 128.0, - 153 => 160.0, - 154 => 192.0, - 155 => 224.0, - 156 => 256.0, - 157 => 320.0, - 158 => 384.0, - 159 => 448.0, - 160 => 512.0, - 161 => 640.0, - 162 => 768.0, - 163 => 896.0, - 164 => 1024.0, - 165 => 1280.0, - 166 => 1536.0, - 167 => 1792.0, - 168 => 2048.0, - 169 => 2560.0, - 170 => 3072.0, - 171 => 3584.0, - 172 => 4096.0, - 173 => 5120.0, - 174 => 6144.0, - 175 => 7168.0, - 176 => 8192.0, - 177 => 10240.0, - 178 => 12288.0, - 179 => 14336.0, - 180 => 16384.0, - 181 => 20480.0, - 182 => 24576.0, - 183 => 28672.0, - 184 => 32768.0, - 185 => 40960.0, - 186 => 49152.0, - 187 => 57344.0, - 188 => 65536.0, - 189 => 81920.0, - 190 => 98304.0, - 191 => 114688.0, - 192 => 131072.0, - 193 => 163840.0, - 194 => 196608.0, - 195 => 229376.0, - 196 => 262144.0, - 197 => 327680.0, - 198 => 393216.0, - 199 => 458752.0, - 200 => 524288.0, - 201 => 655360.0, - 202 => 786432.0, - 203 => 917504.0, - 204 => 1048576.0, - 205 => 1310720.0, - 206 => 1572864.0, - 207 => 1835008.0, - 208 => 2097152.0, - 209 => 2621440.0, - 210 => 3145728.0, - 211 => 3670016.0, - 212 => 4194304.0, - 213 => 5242880.0, - 214 => 6291456.0, - 215 => 7340032.0, - 216 => 8388608.0, - 217 => 1.048576E7, - 218 => 1.2582912E7, - 219 => 1.4680064E7, - 220 => 1.6777216E7, - 221 => 2.097152E7, - 222 => 2.5165824E7, - 223 => 2.9360128E7, - 224 => 3.3554432E7, - 225 => 4.194304E7, - 226 => 5.0331648E7, - 227 => 5.8720256E7, - 228 => 6.7108864E7, - 229 => 8.388608E7, - 230 => 1.00663296E8, - 231 => 1.17440512E8, - 232 => 1.34217728E8, - 233 => 1.6777216E8, - 234 => 2.01326592E8, - 235 => 2.34881024E8, - 236 => 2.68435456E8, - 237 => 3.3554432E8, - 238 => 4.02653184E8, - 239 => 4.69762048E8, - 240 => 5.3687091E8, - 241 => 6.7108864E8, - 242 => 8.0530637E8, - 243 => 9.395241E8, - 244 => 1.07374182E9, - 245 => 1.34217728E9, - 246 => 1.61061274E9, - 247 => 1.87904819E9, - 248 => 2.14748365E9, - 249 => 2.68435456E9, - 250 => 3.22122547E9, - 251 => 3.75809638E9, - 252 => 4.2949673E9, - 253 => 5.3687091E9, - 254 => 6.4424509E9, - 255 => 7.5161928E9 ); - - - /** - * Set the default Similarity implementation used by indexing and search - * code. - * - * @param Zend_Search_Lucene_Search_Similarity $similarity - */ - static public function setDefault(Zend_Search_Lucene_Search_Similarity $similarity) - { - self::$_defaultImpl = $similarity; - } - - - /** - * Return the default Similarity implementation used by indexing and search - * code. - * - * @return Zend_Search_Lucene_Search_Similarity - */ - static public function getDefault() - { - if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Search_Similarity) { - self::$_defaultImpl = new Zend_Search_Lucene_Search_Similarity_Default(); - } - - return self::$_defaultImpl; - } - - - /** - * Computes the normalization value for a field given the total number of - * terms contained in a field. These values, together with field boosts, are - * stored in an index and multipled into scores for hits on each field by the - * search code. - * - * Matches in longer fields are less precise, so implemenations of this - * method usually return smaller values when 'numTokens' is large, - * and larger values when 'numTokens' is small. - * - * That these values are computed under - * IndexWriter::addDocument(Document) and stored then using - * encodeNorm(float). Thus they have limited precision, and documents - * must be re-indexed if this method is altered. - * - * fieldName - name of field - * numTokens - the total number of tokens contained in fields named - * 'fieldName' of 'doc'. - * Returns a normalization factor for hits on this field of this document - * - * @param string $fieldName - * @param integer $numTokens - * @return float - */ - abstract public function lengthNorm($fieldName, $numTokens); - - /** - * Computes the normalization value for a query given the sum of the squared - * weights of each of the query terms. This value is then multipled into the - * weight of each query term. - * - * This does not affect ranking, but rather just attempts to make scores - * from different queries comparable. - * - * sumOfSquaredWeights - the sum of the squares of query term weights - * Returns a normalization factor for query weights - * - * @param float $sumOfSquaredWeights - * @return float - */ - abstract public function queryNorm($sumOfSquaredWeights); - - - /** - * Decodes a normalization factor stored in an index. - * - * @param integer $byte - * @return float - */ - static public function decodeNorm($byte) - { - return self::$_normTable[$byte & 0xFF]; - } - - - /** - * Encodes a normalization factor for storage in an index. - * - * The encoding uses a five-bit exponent and three-bit mantissa, thus - * representing values from around 7x10^9 to 2x10^-9 with about one - * significant decimal digit of accuracy. Zero is also represented. - * Negative numbers are rounded up to zero. Values too large to represent - * are rounded down to the largest representable value. Positive values too - * small to represent are rounded up to the smallest positive representable - * value. - * - * @param float $f - * @return integer - */ - static function encodeNorm($f) - { - return self::_floatToByte($f); - } - - /** - * Float to byte conversion - * - * @param integer $b - * @return float - */ - static private function _floatToByte($f) - { - // round negatives up to zero - if ($f <= 0.0) { - return 0; - } - - // search for appropriate value - $lowIndex = 0; - $highIndex = 255; - while ($highIndex >= $lowIndex) { - // $mid = ($highIndex - $lowIndex)/2; - $mid = ($highIndex + $lowIndex) >> 1; - $delta = $f - self::$_normTable[$mid]; - - if ($delta < 0) { - $highIndex = $mid-1; - } elseif ($delta > 0) { - $lowIndex = $mid+1; - } else { - return $mid; // We got it! - } - } - - // round to closest value - if ($highIndex != 255 && - $f - self::$_normTable[$highIndex] > self::$_normTable[$highIndex+1] - $f ) { - return $highIndex + 1; - } else { - return $highIndex; - } - } - - - /** - * Computes a score factor based on a term or phrase's frequency in a - * document. This value is multiplied by the idf(Term, Searcher) - * factor for each term in the query and these products are then summed to - * form the initial score for a document. - * - * Terms and phrases repeated in a document indicate the topic of the - * document, so implementations of this method usually return larger values - * when 'freq' is large, and smaller values when 'freq' - * is small. - * - * freq - the frequency of a term within a document - * Returns a score factor based on a term's within-document frequency - * - * @param float $freq - * @return float - */ - abstract public function tf($freq); - - /** - * Computes the amount of a sloppy phrase match, based on an edit distance. - * This value is summed for each sloppy phrase match in a document to form - * the frequency that is passed to tf(float). - * - * A phrase match with a small edit distance to a document passage more - * closely matches the document, so implementations of this method usually - * return larger values when the edit distance is small and smaller values - * when it is large. - * - * distance - the edit distance of this sloppy phrase match - * Returns the frequency increment for this match - * - * @param integer $distance - * @return float - */ - abstract public function sloppyFreq($distance); - - - /** - * Computes a score factor for a simple term or a phrase. - * - * The default implementation is: - * return idfFreq(searcher.docFreq(term), searcher.maxDoc()); - * - * input - the term in question or array of terms - * reader - reader the document collection being searched - * Returns a score factor for the term - * - * @param mixed $input - * @param Zend_Search_Lucene $reader - * @return a score factor for the term - */ - public function idf($input, $reader) - { - if (!is_array($input)) { - return $this->idfFreq($reader->docFreq($input), $reader->count()); - } else { - $idf = 0.0; - foreach ($input as $term) { - $idf += $this->idfFreq($reader->docFreq($term), $reader->count()); - } - return $idf; - } - } - - /** - * Computes a score factor based on a term's document frequency (the number - * of documents which contain the term). This value is multiplied by the - * tf(int) factor for each term in the query and these products are - * then summed to form the initial score for a document. - * - * Terms that occur in fewer documents are better indicators of topic, so - * implemenations of this method usually return larger values for rare terms, - * and smaller values for common terms. - * - * docFreq - the number of documents which contain the term - * numDocs - the total number of documents in the collection - * Returns a score factor based on the term's document frequency - * - * @param integer $docFreq - * @param integer $numDocs - * @return float - */ - abstract public function idfFreq($docFreq, $numDocs); - - /** - * Computes a score factor based on the fraction of all query terms that a - * document contains. This value is multiplied into scores. - * - * The presence of a large portion of the query terms indicates a better - * match with the query, so implemenations of this method usually return - * larger values when the ratio between these parameters is large and smaller - * values when the ratio between them is small. - * - * overlap - the number of query terms matched in the document - * maxOverlap - the total number of terms in the query - * Returns a score factor based on term overlap with the query - * - * @param integer $overlap - * @param integer $maxOverlap - * @return float - */ - abstract public function coord($overlap, $maxOverlap); -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Search/Similarity/Default.php b/buildscripts/texbuilder/Zend/Search/Lucene/Search/Similarity/Default.php deleted file mode 100644 index 1551d8bd..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Search/Similarity/Default.php +++ /dev/null @@ -1,99 +0,0 @@ -createWeight(). - * The sumOfSquaredWeights() method is then called on the top-level - * query to compute the query normalization factor Similarity->queryNorm(float). - * This factor is then passed to normalize(float). At this point the weighting - * is complete. - * - * @package Zend_Search_Lucene - * @subpackage Search - * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) - * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 - */ -abstract class Zend_Search_Lucene_Search_Weight -{ - /** - * The weight for this query. - * - * @return float - */ - abstract public function getValue(); - - /** - * The sum of squared weights of contained query clauses. - * - * @return float - */ - abstract public function sumOfSquaredWeights(); - - /** - * Assigns the query normalization factor to this. - * - * @param $norm - */ - abstract public function normalize($norm); -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Search/Weight/MultiTerm.php b/buildscripts/texbuilder/Zend/Search/Lucene/Search/Weight/MultiTerm.php deleted file mode 100644 index 69528ba4..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Search/Weight/MultiTerm.php +++ /dev/null @@ -1,133 +0,0 @@ -_query = $query; - $this->_reader = $reader; - $this->_weights = array(); - - $signs = $query->getSigns(); - - foreach ($query->getTerms() as $num => $term) { - if ($signs === null || $signs[$num] === null || $signs[$num]) { - $this->_weights[$num] = new Zend_Search_Lucene_Search_Weight_Term($term, $query, $reader); - $query->setWeight($num, $this->_weights[$num]); - } - } - } - - - /** - * The weight for this query - * - * @return float - */ - public function getValue() - { - return $this->_query->getBoost(); - } - - - /** - * The sum of squared weights of contained query clauses. - * - * @return float - */ - public function sumOfSquaredWeights() - { - $sum = 0; - foreach ($this->_weights as $weight) { - // sum sub weights - $sum += $weight->sumOfSquaredWeights(); - } - - // boost each sub-weight - $sum *= $this->_query->getBoost() * $this->_query->getBoost(); - - // check for empty query (like '-something -another') - if ($sum == 0) { - $sum = 1.0; - } - return $sum; - } - - - /** - * Assigns the query normalization factor to this. - * - * @param float $queryNorm - */ - public function normalize($queryNorm) - { - // incorporate boost - $queryNorm *= $this->_query->getBoost(); - - foreach ($this->_weights as $weight) { - $weight->normalize($queryNorm); - } - } -} - - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Search/Weight/Phrase.php b/buildscripts/texbuilder/Zend/Search/Lucene/Search/Weight/Phrase.php deleted file mode 100644 index 77e94f28..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Search/Weight/Phrase.php +++ /dev/null @@ -1,138 +0,0 @@ -_query = $query; - $this->_reader = $reader; - } - - - /** - * The weight for this query - * - * @return float - */ - public function getValue() - { - return $this->_value; - } - - - /** - * The sum of squared weights of contained query clauses. - * - * @return float - */ - public function sumOfSquaredWeights() - { - // compute idf - $this->_idf = $this->_reader->getSimilarity()->idf($this->_query->getTerms(), $this->_reader); - - // compute query weight - $this->_queryWeight = $this->_idf * $this->_query->getBoost(); - - // square it - return $this->_queryWeight * $this->_queryWeight; - } - - - /** - * Assigns the query normalization factor to this. - * - * @param float $queryNorm - */ - public function normalize($queryNorm) - { - $this->_queryNorm = $queryNorm; - - // normalize query weight - $this->_queryWeight *= $queryNorm; - - // idf for documents - $this->_value = $this->_queryWeight * $this->_idf; - } -} - - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Search/Weight/Term.php b/buildscripts/texbuilder/Zend/Search/Lucene/Search/Weight/Term.php deleted file mode 100644 index 3e6102f3..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Search/Weight/Term.php +++ /dev/null @@ -1,144 +0,0 @@ -_term = $term; - $this->_query = $query; - $this->_reader = $reader; - } - - - /** - * The weight for this query - * - * @return float - */ - public function getValue() - { - return $this->_value; - } - - - /** - * The sum of squared weights of contained query clauses. - * - * @return float - */ - public function sumOfSquaredWeights() - { - // compute idf - $this->_idf = $this->_reader->getSimilarity()->idf($this->_term, $this->_reader); - - // compute query weight - $this->_queryWeight = $this->_idf * $this->_query->getBoost(); - - // square it - return $this->_queryWeight * $this->_queryWeight; - } - - - /** - * Assigns the query normalization factor to this. - * - * @param float $queryNorm - */ - public function normalize($queryNorm) - { - $this->_queryNorm = $queryNorm; - - // normalize query weight - $this->_queryWeight *= $queryNorm; - - // idf for documents - $this->_value = $this->_queryWeight * $this->_idf; - } -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Storage/Directory.php b/buildscripts/texbuilder/Zend/Search/Lucene/Storage/Directory.php deleted file mode 100644 index 48114a76..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Storage/Directory.php +++ /dev/null @@ -1,118 +0,0 @@ - Zend_Search_Lucene_Storage_File object - * - * @var array - * @throws Zend_Search_Lucene_Exception - */ - private $_fileHandlers; - - - /** - * Utility function to recursive directory creation - * - * @param string $dir - * @param integer $mode - * @param boolean $recursive - * @return boolean - */ - - static public function mkdirs($dir, $mode = 0777, $recursive = true) - { - if (is_null($dir) || $dir === '') { - return false; - } - if (is_dir($dir) || $dir === '/') { - return true; - } - if (self::mkdirs(dirname($dir), $mode, $recursive)) { - return mkdir($dir, $mode); - } - return false; - } - - - /** - * Object constructor - * Checks if $path is a directory or tries to create it. - * - * @param string $path - * @throws Zend_Search_Lucene_Exception - */ - public function __construct($path) - { - if (!is_dir($path)) { - if (file_exists($path)) { - throw new Zend_Search_Lucene_Exception('Path exists, but it\'s not a directory'); - } else { - if (!self::mkdirs($path)) { - throw new Zend_Search_Lucene_Exception("Can't create directory '$path'."); - } - } - } - $this->_dirPath = $path; - $this->_fileHandlers = array(); - } - - - /** - * Closes the store. - * - * @return void - */ - public function close() - { - foreach ($this->_fileHandlers as $fileObject) { - $fileObject->close(); - } - - unset($this->_fileHandlers); - } - - - /** - * Returns an array of strings, one for each file in the directory. - * - * @return array - */ - public function fileList() - { - $result = array(); - - $dirContent = opendir( $this->_dirPath ); - while ($file = readdir($dirContent)) { - if (($file == '..')||($file == '.')) continue; - - $fullName = $this->_dirPath . '/' . $file; - - if( !is_dir($this->_dirPath . '/' . $file) ) { - $result[] = $file; - } - } - - return $result; - } - - /** - * Creates a new, empty file in the directory with the given $filename. - * - * @param string $filename - * @return Zend_Search_Lucene_Storage_File - */ - public function createFile($filename) - { - if (isset($this->_fileHandlers[$filename])) { - $this->_fileHandlers[$filename]->close(); - } - unset($this->_fileHandlers[$filename]); - $this->_fileHandlers[$filename] = new Zend_Search_Lucene_Storage_File_Filesystem($this->_dirPath . '/' . $filename, 'w+b'); - return $this->_fileHandlers[$filename]; - } - - - /** - * Removes an existing $filename in the directory. - * - * @param string $filename - * @return void - */ - public function deleteFile($filename) - { - if (isset($this->_fileHandlers[$filename])) { - $this->_fileHandlers[$filename]->close(); - } - unset($this->_fileHandlers[$filename]); - unlink($this->_dirPath .'/'. $filename); - } - - - /** - * Returns true if a file with the given $filename exists. - * - * @param string $filename - * @return boolean - */ - public function fileExists($filename) - { - return file_exists($this->_dirPath .'/'. $filename); - } - - - /** - * Returns the length of a $filename in the directory. - * - * @param string $filename - * @return integer - */ - public function fileLength($filename) - { - if (isset( $this->_fileHandlers[$filename] )) { - return $this->_fileHandlers[$filename]->size(); - } - return filesize($this->_dirPath .'/'. $filename); - } - - - /** - * Returns the UNIX timestamp $filename was last modified. - * - * @param string $filename - * @return integer - */ - public function fileModified($filename) - { - return filemtime($this->_dirPath .'/'. $filename); - } - - - /** - * Renames an existing file in the directory. - * - * @param string $from - * @param string $to - * @return void - */ - public function renameFile($from, $to) - { - if ($this->_fileHandlers[$from] !== null) { - $this->_fileHandlers[$from]->close(); - } - unset($this->_fileHandlers[$from]); - - if ($this->_fileHandlers[$to] !== null) { - $this->_fileHandlers[$to]->close(); - } - unset($this->_fileHandlers[$to]); - - if (file_exists($this->_dirPath . '/' . $to)) { - unlink($this->_dirPath . '/' . $to); - } - - return @rename($this->_dirPath . '/' . $from, $this->_dirPath . '/' . $to); - } - - - /** - * Sets the modified time of $filename to now. - * - * @param string $filename - * @return void - */ - public function touchFile($filename) - { - return touch($this->_dirPath .'/'. $filename); - } - - - /** - * Returns a Zend_Search_Lucene_Storage_File object for a given $filename in the directory. - * - * @param string $filename - * @return Zend_Search_Lucene_Storage_File - */ - public function getFileObject($filename) - { - if (isset( $this->_fileHandlers[$filename] )) { - $this->_fileHandlers[$filename]->seek(0); - return $this->_fileHandlers[$filename]; - } - - $this->_fileHandlers[$filename] = new Zend_Search_Lucene_Storage_File_Filesystem($this->_dirPath . '/' . $filename, 'rb'); - return $this->_fileHandlers[$filename]; - } -} - diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Storage/File.php b/buildscripts/texbuilder/Zend/Search/Lucene/Storage/File.php deleted file mode 100644 index f62af33a..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Storage/File.php +++ /dev/null @@ -1,376 +0,0 @@ -_fread(1)); - } - - /** - * Writes a byte to the end of the file. - * - * @param integer $byte - */ - public function writeByte($byte) - { - return $this->_fwrite(chr($byte), 1); - } - - /** - * Read num bytes from the current position in the file - * and advances the file pointer. - * - * @param integer $num - * @return string - */ - public function readBytes($num) - { - return $this->_fread($num); - } - - /** - * Writes num bytes of data (all, if $num===null) to the end - * of the file. - * - * @param string $data - * @param integer $num - */ - public function writeBytes($data, $num=null) - { - $this->_fwrite($data, $num); - } - - - /** - * Reads an integer from the current position in the file - * and advances the file pointer. - * - * @return integer - */ - public function readInt() - { - $str = $this->_fread(4); - - return ord($str{0}) << 24 | - ord($str{1}) << 16 | - ord($str{2}) << 8 | - ord($str{3}); - } - - - /** - * Writes an integer to the end of file. - * - * @param integer $value - */ - public function writeInt($value) - { - settype($value, 'integer'); - $this->_fwrite( chr($value>>24 & 0xFF) . - chr($value>>16 & 0xFF) . - chr($value>>8 & 0xFF) . - chr($value & 0xFF), 4 ); - } - - - /** - * Returns a long integer from the current position in the file - * and advances the file pointer. - * - * @return integer - */ - public function readLong() - { - $str = $this->_fread(8); - - /** - * PHP uses long as largest integer. fseek() uses long for offset. - * long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent - * conversion to float. - * So, largest index segment file is 2Gb - */ - return /* ord($str{0}) << 56 | */ - /* ord($str{1}) << 48 | */ - /* ord($str{2}) << 40 | */ - /* ord($str{3}) << 32 | */ - ord($str{4}) << 24 | - ord($str{5}) << 16 | - ord($str{6}) << 8 | - ord($str{7}); - } - - /** - * Writes long integer to the end of file - * - * @param integer $value - */ - public function writeLong($value) - { - /** - * PHP uses long as largest integer. fseek() uses long for offset. - * long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent - * conversion to float. - * So, largest index segment file is 2Gb - */ - settype($value, 'integer'); - $this->_fwrite( "\x00\x00\x00\x00" . - chr($value>>24 & 0xFF) . - chr($value>>16 & 0xFF) . - chr($value>>8 & 0xFF) . - chr($value & 0xFF), 8 ); - } - - - - /** - * Returns a variable-length integer from the current - * position in the file and advances the file pointer. - * - * @return integer - */ - public function readVInt() - { - $nextByte = ord($this->_fread(1)); - $val = $nextByte & 0x7F; - - for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) { - $nextByte = ord($this->_fread(1)); - $val |= ($nextByte & 0x7F) << $shift; - } - return $val; - } - - /** - * Writes a variable-length integer to the end of file. - * - * @param integer $value - */ - public function writeVInt($value) - { - settype($value, 'integer'); - while ($value > 0x7F) { - $this->_fwrite(chr( ($value & 0x7F)|0x80 )); - $value >>= 7; - } - $this->_fwrite(chr($value)); - } - - - /** - * Reads a string from the current position in the file - * and advances the file pointer. - * - * @return string - */ - public function readString() - { - $strlen = $this->readVInt(); - if ($strlen == 0) { - return ''; - } else { - /** - * This implementation supports only Basic Multilingual Plane - * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support - * "supplementary characters" (characters whose code points are - * greater than 0xFFFF) - * Java 2 represents these characters as a pair of char (16-bit) - * values, the first from the high-surrogates range (0xD800-0xDBFF), - * the second from the low-surrogates range (0xDC00-0xDFFF). Then - * they are encoded as usual UTF-8 characters in six bytes. - * Standard UTF-8 representation uses four bytes for supplementary - * characters. - */ - - $str_val = $this->_fread($strlen); - - for ($count = 0; $count < $strlen; $count++ ) { - if (( ord($str_val{$count}) & 0xC0 ) == 0xC0) { - $addBytes = 1; - if (ord($str_val{$count}) & 0x20 ) { - $addBytes++; - - // Never used. Java2 doesn't encode strings in four bytes - if (ord($str_val{$count}) & 0x10 ) { - $addBytes++; - } - } - $str_val .= $this->_fread($addBytes); - $strlen += $addBytes; - - // Check for null character. Java2 encodes null character - // in two bytes. - if (ord($str_val{$count}) == 0xC0 && - ord($str_val{$count+1}) == 0x80 ) { - $str_val{$count} = 0; - $str_val = substr($str_val,0,$count+1) - . substr($str_val,$count+2); - } - $count += $addBytes; - } - } - - return $str_val; - } - } - - /** - * Writes a string to the end of file. - * - * @param string $str - * @throws Zend_Search_Lucene_Exception - */ - public function writeString($str) - { - /** - * This implementation supports only Basic Multilingual Plane - * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support - * "supplementary characters" (characters whose code points are - * greater than 0xFFFF) - * Java 2 represents these characters as a pair of char (16-bit) - * values, the first from the high-surrogates range (0xD800-0xDBFF), - * the second from the low-surrogates range (0xDC00-0xDFFF). Then - * they are encoded as usual UTF-8 characters in six bytes. - * Standard UTF-8 representation uses four bytes for supplementary - * characters. - */ - - // convert input to a string before iterating string characters - settype($str, 'string'); - - $chars = $strlen = strlen($str); - $containNullChars = false; - - for ($count = 0; $count < $strlen; $count++ ) { - /** - * String is already in Java 2 representation. - * We should only calculate actual string length and replace - * \x00 by \xC0\x80 - */ - if ((ord($str{$count}) & 0xC0) == 0xC0) { - $addBytes = 1; - if (ord($str{$count}) & 0x20 ) { - $addBytes++; - - // Never used. Java2 doesn't encode strings in four bytes - // and we dont't support non-BMP characters - if (ord($str{$count}) & 0x10 ) { - $addBytes++; - } - } - $chars -= $addBytes; - - if (ord($str{$count}) == 0 ) { - $containNullChars = true; - } - $count += $addBytes; - } - } - - if ($chars < 0) { - throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string'); - } - - $this->writeVInt($chars); - if ($containNullChars) { - $this->_fwrite(str_replace($str, "\x00", "\xC0\x80")); - } else { - $this->_fwrite($str); - } - } - - - /** - * Reads binary data from the current position in the file - * and advances the file pointer. - * - * @return string - */ - public function readBinary() - { - return $this->_fread($this->readVInt()); - } -} \ No newline at end of file diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Storage/File/Filesystem.php b/buildscripts/texbuilder/Zend/Search/Lucene/Storage/File/Filesystem.php deleted file mode 100644 index fc6adcf5..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Storage/File/Filesystem.php +++ /dev/null @@ -1,170 +0,0 @@ -_fileHandle = @fopen($filename, $mode); - - if ($this->_fileHandle===false) { - ini_set('track_errors', $trackErrors); - throw new Zend_Search_Lucene_Exception($php_errormsg); - } - - ini_set('track_errors', $trackErrors); - } - - - /** - * Sets the file position indicator and advances the file pointer. - * The new position, measured in bytes from the beginning of the file, - * is obtained by adding offset to the position specified by whence, - * whose values are defined as follows: - * SEEK_SET - Set position equal to offset bytes. - * SEEK_CUR - Set position to current location plus offset. - * SEEK_END - Set position to end-of-file plus offset. (To move to - * a position before the end-of-file, you need to pass a negative value - * in offset.) - * Upon success, returns 0; otherwise, returns -1 - * - * @param integer $offset - * @param integer $whence - * @return integer - */ - public function seek($offset, $whence=SEEK_SET) - { - return fseek($this->_fileHandle, $offset, $whence); - } - - - /** - * Get file position. - * - * @return integer - */ - public function tell() - { - return ftell($this->_fileHandle); - } - - - /** - * Close File object - */ - public function close() - { - if ($this->_fileHandle !== null ) { - @fclose($this->_fileHandle); - $this->_fileHandle = null; - } - } - - /** - * Get the size of the already opened file - * - * @return integer - */ - public function size() - { - $position = ftell($this->_fileHandle); - fseek($this->_fileHandle, 0, SEEK_END); - $size = ftell($this->_fileHandle); - fseek($this->_fileHandle,$position); - - return $size; - } - - /** - * Read a $length bytes from the file and advance the file pointer. - * - * @param integer $length - * @return string - */ - protected function _fread($length=1) - { - if ($length == 0) { - return ''; - } - - if ($length < 1024) { - return fread($this->_fileHandle, $length); - } - - $data = ''; - while ( $length > 0 && ($nextBlock = fread($this->_fileHandle, $length)) != false ) { - $data .= $nextBlock; - $length -= strlen($nextBlock); - } - return $data; - } - - - /** - * Writes $length number of bytes (all, if $length===null) to the end - * of the file. - * - * @param string $data - * @param integer $length - */ - protected function _fwrite($data, $length=null) - { - if ($length === null ) { - fwrite($this->_fileHandle, $data); - } else { - fwrite($this->_fileHandle, $data, $length); - } - } -} - diff --git a/buildscripts/texbuilder/Zend/Search/TODO.txt b/buildscripts/texbuilder/Zend/Search/TODO.txt deleted file mode 100644 index 06f7b487..00000000 --- a/buildscripts/texbuilder/Zend/Search/TODO.txt +++ /dev/null @@ -1,14 +0,0 @@ -@todo - -- Improve API: fix ZSearchMultiTermQuery($terms, $signs); - -- Analysis and indexing engine - -- Additional queries: phrase, wildcard, proximity, and range - -- Better class-level docblocks (most functions okay) - -- Some Windows issues(?) during indexing - -- Finish renaming classes to PEAR-like conventions - -- cgit v1.2.3