From 702fb4a66d473fb1ee98aaa81cd26b4813279ef8 Mon Sep 17 00:00:00 2001 From: wei <> Date: Tue, 9 May 2006 11:39:11 +0000 Subject: Remove duplicate --- buildscripts/texbuilder/Zend/Search/Lucene.php | 569 ------------------------- 1 file changed, 569 deletions(-) delete mode 100644 buildscripts/texbuilder/Zend/Search/Lucene.php (limited to 'buildscripts/texbuilder/Zend/Search/Lucene.php') diff --git a/buildscripts/texbuilder/Zend/Search/Lucene.php b/buildscripts/texbuilder/Zend/Search/Lucene.php deleted file mode 100644 index 700a8b8a..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene.php +++ /dev/null @@ -1,569 +0,0 @@ -_directory = $directory; - $this->_closeDirOnExit = false; - } else { - $this->_directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory); - $this->_closeDirOnExit = true; - } - - if ($create) { - $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, true); - } else { - $this->_writer = null; - } - - $this->_segmentInfos = array(); - - $segmentsFile = $this->_directory->getFileObject('segments'); - - $format = $segmentsFile->readInt(); - - if ($format != (int)0xFFFFFFFF) { - throw new Zend_Search_Lucene_Exception('Wrong segments file format'); - } - - // read version - $segmentsFile->readLong(); - - // read counter - $segmentsFile->readInt(); - - $segments = $segmentsFile->readInt(); - - $this->_docCount = 0; - - // read segmentInfos - for ($count = 0; $count < $segments; $count++) { - $segName = $segmentsFile->readString(); - $segSize = $segmentsFile->readInt(); - $this->_docCount += $segSize; - - $this->_segmentInfos[$count] = - new Zend_Search_Lucene_Index_SegmentInfo($segName, - $segSize, - $this->_directory); - } - } - - - /** - * Object destructor - */ - public function __destruct() - { - $this->commit(); - - if ($this->_closeDirOnExit) { - $this->_directory->close(); - } - } - - /** - * Returns an instance of Zend_Search_Lucene_Index_Writer for the index - * - * @return Zend_Search_Lucene_Index_Writer - */ - public function getIndexWriter() - { - if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) { - $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory); - } - - return $this->_writer; - } - - - /** - * Returns the Zend_Search_Lucene_Storage_Directory instance for this index. - * - * @return Zend_Search_Lucene_Storage_Directory - */ - public function getDirectory() - { - return $this->_directory; - } - - - /** - * Returns the total number of documents in this index. - * - * @return integer - */ - public function count() - { - return $this->_docCount; - } - - - /** - * Performs a query against the index and returns an array - * of Zend_Search_Lucene_Search_QueryHit objects. - * Input is a string or Zend_Search_Lucene_Search_Query. - * - * @param mixed $query - * @return array ZSearchHit - */ - public function find($query) - { - if (is_string($query)) { - $query = Zend_Search_Lucene_Search_QueryParser::parse($query); - } - - if (!$query instanceof Zend_Search_Lucene_Search_Query) { - throw new Zend_Search_Lucene_Exception('Query must be a string or Zend_Search_Lucene_Search_Query object'); - } - - $this->commit(); - - $hits = array(); - $scores = array(); - - $docNum = $this->count(); - for( $count=0; $count < $docNum; $count++ ) { - $docScore = $query->score( $count, $this); - if( $docScore != 0 ) { - $hit = new Zend_Search_Lucene_Search_QueryHit($this); - $hit->id = $count; - $hit->score = $docScore; - - $hits[] = $hit; - $scores[] = $docScore; - } - } - array_multisort($scores, SORT_DESC, SORT_REGULAR, $hits); - - return $hits; - } - - - /** - * Returns a list of all unique field names that exist in this index. - * - * @param boolean $indexed - * @return array - */ - public function getFieldNames($indexed = false) - { - $result = array(); - foreach( $this->_segmentInfos as $segmentInfo ) { - $result = array_merge($result, $segmentInfo->getFields($indexed)); - } - return $result; - } - - - /** - * Returns a Zend_Search_Lucene_Document object for the document - * number $id in this index. - * - * @param integer|Zend_Search_Lucene_Search_QueryHit $id - * @return Zend_Search_Lucene_Document - */ - public function getDocument($id) - { - if ($id instanceof Zend_Search_Lucene_Search_QueryHit) { - /* @var $id Zend_Search_Lucene_Search_QueryHit */ - $id = $id->id; - } - - if ($id >= $this->_docCount) { - /** - * @todo exception here? - */ - return null; - } - - $segCount = 0; - $nextSegmentStartId = $this->_segmentInfos[ 0 ]->count(); - while( $nextSegmentStartId <= $id ) { - $segCount++; - $nextSegmentStartId += $this->_segmentInfos[ $segCount ]->count(); - } - $segmentStartId = $nextSegmentStartId - $this->_segmentInfos[ $segCount ]->count(); - - $fdxFile = $this->_segmentInfos[ $segCount ]->openCompoundFile('.fdx'); - $fdxFile->seek( ($id-$segmentStartId)*8, SEEK_CUR ); - $fieldValuesPosition = $fdxFile->readLong(); - - $fdtFile = $this->_segmentInfos[ $segCount ]->openCompoundFile('.fdt'); - $fdtFile->seek( $fieldValuesPosition, SEEK_CUR ); - $fieldCount = $fdtFile->readVInt(); - - $doc = new Zend_Search_Lucene_Document(); - for( $count = 0; $count < $fieldCount; $count++ ) { - $fieldNum = $fdtFile->readVInt(); - $bits = $fdtFile->readByte(); - - $fieldInfo = $this->_segmentInfos[ $segCount ]->getField($fieldNum); - - if( !($bits & 2) ) { // Text data - $field = new Zend_Search_Lucene_Field($fieldInfo->name, - $fdtFile->readString(), - true, - $fieldInfo->isIndexed, - $bits & 1 ); - } else { - $field = new Zend_Search_Lucene_Field($fieldInfo->name, - $fdtFile->readBinary(), - true, - $fieldInfo->isIndexed, - $bits & 1 ); - } - - $doc->addField($field); - } - - return $doc; - } - - - /** - * Returns an array of all the documents which contain term. - * - * @param Zend_Search_Lucene_Index_Term $term - * @return array - */ - public function termDocs(Zend_Search_Lucene_Index_Term $term) - { - $result = array(); - $segmentStartDocId = 0; - - foreach ($this->_segmentInfos as $segInfo) { - $termInfo = $segInfo->getTermInfo($term); - - if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) { - $segmentStartDocId += $segInfo->count(); - continue; - } - - $frqFile = $segInfo->openCompoundFile('.frq'); - $frqFile->seek($termInfo->freqPointer,SEEK_CUR); - $docId = 0; - for( $count=0; $count < $termInfo->docFreq; $count++ ) { - $docDelta = $frqFile->readVInt(); - if( $docDelta % 2 == 1 ) { - $docId += ($docDelta-1)/2; - } else { - $docId += $docDelta/2; - // read freq - $frqFile->readVInt(); - } - $result[] = $segmentStartDocId + $docId; - } - - $segmentStartDocId += $segInfo->count(); - } - - return $result; - } - - - /** - * Returns an array of all term positions in the documents. - * Return array structure: array( docId => array( pos1, pos2, ...), ...) - * - * @param Zend_Search_Lucene_Index_Term $term - * @return array - */ - public function termPositions(Zend_Search_Lucene_Index_Term $term) - { - $result = array(); - $segmentStartDocId = 0; - foreach( $this->_segmentInfos as $segInfo ) { - $termInfo = $segInfo->getTermInfo($term); - - if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) { - $segmentStartDocId += $segInfo->count(); - continue; - } - - $frqFile = $segInfo->openCompoundFile('.frq'); - $frqFile->seek($termInfo->freqPointer,SEEK_CUR); - $freqs = array(); - $docId = 0; - - for( $count = 0; $count < $termInfo->docFreq; $count++ ) { - $docDelta = $frqFile->readVInt(); - if( $docDelta % 2 == 1 ) { - $docId += ($docDelta-1)/2; - $freqs[ $docId ] = 1; - } else { - $docId += $docDelta/2; - $freqs[ $docId ] = $frqFile->readVInt(); - } - } - - $prxFile = $segInfo->openCompoundFile('.prx'); - $prxFile->seek($termInfo->proxPointer,SEEK_CUR); - foreach ($freqs as $docId => $freq) { - $termPosition = 0; - $positions = array(); - - for ($count = 0; $count < $freq; $count++ ) { - $termPosition += $prxFile->readVInt(); - $positions[] = $termPosition; - } - $result[ $segmentStartDocId + $docId ] = $positions; - } - - $segmentStartDocId += $segInfo->count(); - } - - return $result; - } - - - /** - * Returns the number of documents in this index containing the $term. - * - * @param Zend_Search_Lucene_Index_Term $term - * @return integer - */ - public function docFreq(Zend_Search_Lucene_Index_Term $term) - { - $result = 0; - foreach ($this->_segmentInfos as $segInfo) { - $termInfo = $segInfo->getTermInfo($term); - if ($termInfo !== null) { - $result += $termInfo->docFreq; - } - } - - return $result; - } - - - /** - * Retrive similarity used by index reader - * - * @return Zend_Search_Lucene_Search_Similarity - */ - public function getSimilarity() - { - return Zend_Search_Lucene_Search_Similarity::getDefault(); - } - - - /** - * Returns a normalization factor for "field, document" pair. - * - * @param integer $id - * @param string $fieldName - * @return Zend_Search_Lucene_Document - */ - public function norm( $id, $fieldName ) - { - if( $id >= $this->_docCount ) - return null; - - $segCount = 0; - $nextSegmentStartId = $this->_segmentInfos[ 0 ]->count(); - while( $nextSegmentStartId <= $id ) { - $segCount++; - $nextSegmentStartId += $this->_segmentInfos[ $segCount ]->count(); - } - - $segmentStartId = $nextSegmentStartId - $this->_segmentInfos[ $segCount ]->count(); - - return $this->_segmentInfos[ $segCount ]->norm($id - $segmentStartId, $fieldName); - } - - - /** - * Adds a document to this index. - * - * @param Zend_Search_Lucene_Document $document - */ - public function addDocument(Zend_Search_Lucene_Document $document) - { - if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) { - $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory); - } - - $this->_writer->addDocument($document); - } - - - /** - * Commit changes resulting from delete() or undeleteAll() operations. - * - * @todo delete() and undeleteAll processing. - */ - public function commit() - { - if ($this->_writer !== null) { - foreach ($this->_writer->commit() as $segmentName => $segmentInfo) { - if ($segmentInfo !== null) { - $this->_segmentInfos[] = $segmentInfo; - $this->_docCount += $segmentInfo->count(); - } else { - foreach ($this->_segmentInfos as $segId => $segInfo) { - if ($segInfo->getName() == $segmentName) { - unset($this->_segmentInfos[$segId]); - } - } - } - } - } - } - - - /************************************************************************* - @todo UNIMPLEMENTED - *************************************************************************/ - - /** - * Returns an array of all terms in this index. - * - * @todo Implementation - * @return array - */ - public function terms() - { - return array(); - } - - - /** - * Returns true if any documents have been deleted from this index. - * - * @todo Implementation - * @return boolean - */ - public function hasDeletions() - { - return false; - } - - - /** - * Deletes a document from the index. $doc may contain a Zend_Search_Lucene_Document - * or the number of the document to delete. - * - * @todo Implementation - * @param mixed $item_to_del - */ - public function delete($doc) - {} - - - /** - * Undeletes all documents currently marked as deleted in this index. - * - * @todo Implementation - */ - public function undeleteAll() - {} -} \ No newline at end of file -- cgit v1.2.3