diff options
Diffstat (limited to 'buildscripts/texbuilder/Zend/Search/Lucene.php')
| -rw-r--r-- | buildscripts/texbuilder/Zend/Search/Lucene.php | 569 | 
1 files changed, 0 insertions, 569 deletions
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene.php b/buildscripts/texbuilder/Zend/Search/Lucene.php deleted file mode 100644 index 700a8b8a..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene.php +++ /dev/null @@ -1,569 +0,0 @@ -<?php -/** - * Zend Framework - * - * LICENSE - * - * This source file is subject to version 1.0 of the Zend Framework - * license, that is bundled with this package in the file LICENSE, and - * is available through the world-wide-web at the following URL: - * http://www.zend.com/license/framework/1_0.txt. If you did not receive - * a copy of the Zend Framework license and are unable to obtain it - * through the world-wide-web, please send a note to license@zend.com - * so we can mail you a copy immediately. - * - * @package    Zend_Search_Lucene - * @copyright  Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) - * @license    http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 - */ - - -/** Zend_Search_Lucene_Exception */ -require_once 'Zend/Search/Lucene/Exception.php'; - -/** Zend_Search_Lucene_Document */ -require_once 'Zend/Search/Lucene/Document.php'; - -/** Zend_Search_Lucene_Storage_Directory */ -require_once 'Zend/Search/Lucene/Storage/Directory/Filesystem.php'; - -/** Zend_Search_Lucene_Index_Term */ -require_once 'Zend/Search/Lucene/Index/Term.php'; - -/** Zend_Search_Lucene_Index_TermInfo */ -require_once 'Zend/Search/Lucene/Index/TermInfo.php'; - -/** Zend_Search_Lucene_Index_SegmentInfo */ -require_once 'Zend/Search/Lucene/Index/SegmentInfo.php'; - -/** Zend_Search_Lucene_Index_FieldInfo */ -require_once 'Zend/Search/Lucene/Index/FieldInfo.php'; - -/** Zend_Search_Lucene_Index_Writer */ -require_once 'Zend/Search/Lucene/Index/Writer.php'; - -/** Zend_Search_Lucene_Search_QueryParser */ -require_once 'Zend/Search/Lucene/Search/QueryParser.php'; - -/** Zend_Search_Lucene_Search_QueryHit */ -require_once 'Zend/Search/Lucene/Search/QueryHit.php'; - -/** Zend_Search_Lucene_Search_Similarity */ -require_once 'Zend/Search/Lucene/Search/Similarity.php'; - - -/** - * @package    Zend_Search_Lucene - * @copyright  Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) - * @license    http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 - */ -class Zend_Search_Lucene -{ -    /** -     * File system adapter. -     * -     * @var Zend_Search_Lucene_Storage_Directory -     */ -    private $_directory = null; - -    /** -     * File system adapter closing option -     * -     * @var boolean -     */ -    private $_closeDirOnExit = true; - -    /** -     * Writer for this index, not instantiated unless required. -     * -     * @var Zend_Search_Lucene_Index_Writer -     */ -    private $_writer = null; - -    /** -     * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index. -     * -     * @var array Zend_Search_Lucene_Index_SegmentInfo -     */ -    private $_segmentInfos = array(); - -    /** -     * Number of documents in this index. -     * -     * @var integer -     */ -    private $_docCount = 0; - - -    /** -     * Opens the index. -     * -     * IndexReader constructor needs Directory as a parameter. It should be -     * a string with a path to the index folder or a Directory object. -     * -     * @param mixed $directory -     * @throws Zend_Search_Lucene_Exception -     */ -    public function __construct($directory = null, $create = false) -    { -        if ($directory === null) { -            throw new Zend_Search_Exception('No index directory specified'); -        } - -        if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) { -            $this->_directory      = $directory; -            $this->_closeDirOnExit = false; -        } else { -            $this->_directory      = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory); -            $this->_closeDirOnExit = true; -        } - -        if ($create) { -            $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, true); -        } else { -            $this->_writer = null; -        } - -        $this->_segmentInfos = array(); - -        $segmentsFile = $this->_directory->getFileObject('segments'); - -        $format = $segmentsFile->readInt(); - -        if ($format != (int)0xFFFFFFFF) { -            throw new Zend_Search_Lucene_Exception('Wrong segments file format'); -        } - -        // read version -        $segmentsFile->readLong(); - -        // read counter -        $segmentsFile->readInt(); - -        $segments = $segmentsFile->readInt(); - -        $this->_docCount = 0; - -        // read segmentInfos -        for ($count = 0; $count < $segments; $count++) { -            $segName = $segmentsFile->readString(); -            $segSize = $segmentsFile->readInt(); -            $this->_docCount += $segSize; - -            $this->_segmentInfos[$count] = -                                new Zend_Search_Lucene_Index_SegmentInfo($segName, -                                                                         $segSize, -                                                                         $this->_directory); -        } -    } - - -    /** -     * Object destructor -     */ -    public function __destruct() -    { -        $this->commit(); - -        if ($this->_closeDirOnExit) { -            $this->_directory->close(); -        } -    } - -    /** -     * Returns an instance of Zend_Search_Lucene_Index_Writer for the index -     * -     * @return Zend_Search_Lucene_Index_Writer -     */ -    public function getIndexWriter() -    { -        if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) { -            $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory); -        } - -        return $this->_writer; -    } - - -    /** -     * Returns the Zend_Search_Lucene_Storage_Directory instance for this index. -     * -     * @return Zend_Search_Lucene_Storage_Directory -     */ -    public function getDirectory() -    { -        return $this->_directory; -    } - - -    /** -     * Returns the total number of documents in this index. -     * -     * @return integer -     */ -    public function count() -    { -        return $this->_docCount; -    } - - -    /** -     * Performs a query against the index and returns an array -     * of Zend_Search_Lucene_Search_QueryHit objects. -     * Input is a string or Zend_Search_Lucene_Search_Query. -     * -     * @param mixed $query -     * @return array ZSearchHit -     */ -    public function find($query) -    { -        if (is_string($query)) { -            $query = Zend_Search_Lucene_Search_QueryParser::parse($query); -        } - -        if (!$query instanceof Zend_Search_Lucene_Search_Query) { -            throw new Zend_Search_Lucene_Exception('Query must be a string or Zend_Search_Lucene_Search_Query object'); -        } - -        $this->commit(); - -        $hits = array(); -        $scores = array(); - -        $docNum = $this->count(); -        for( $count=0; $count < $docNum; $count++ ) { -            $docScore = $query->score( $count, $this); -            if( $docScore != 0 ) { -                $hit = new Zend_Search_Lucene_Search_QueryHit($this); -                $hit->id = $count; -                $hit->score = $docScore; - -                $hits[] = $hit; -                $scores[] = $docScore; -            } -        } -        array_multisort($scores, SORT_DESC, SORT_REGULAR, $hits); - -        return $hits; -    } - - -    /** -     * Returns a list of all unique field names that exist in this index. -     * -     * @param boolean $indexed -     * @return array -     */ -    public function getFieldNames($indexed = false) -    { -        $result = array(); -        foreach( $this->_segmentInfos as $segmentInfo ) { -            $result = array_merge($result, $segmentInfo->getFields($indexed)); -        } -        return $result; -    } - - -    /** -     * Returns a Zend_Search_Lucene_Document object for the document -     * number $id in this index. -     * -     * @param integer|Zend_Search_Lucene_Search_QueryHit $id -     * @return Zend_Search_Lucene_Document -     */ -    public function getDocument($id) -    { -        if ($id instanceof Zend_Search_Lucene_Search_QueryHit) { -            /* @var $id Zend_Search_Lucene_Search_QueryHit */ -            $id = $id->id; -        } - -        if ($id >= $this->_docCount) { -            /** -             * @todo exception here? -             */ -            return null; -        } - -        $segCount = 0; -        $nextSegmentStartId = $this->_segmentInfos[ 0 ]->count(); -        while( $nextSegmentStartId <= $id ) { -               $segCount++; -               $nextSegmentStartId += $this->_segmentInfos[ $segCount ]->count(); -        } -        $segmentStartId = $nextSegmentStartId - $this->_segmentInfos[ $segCount ]->count(); - -        $fdxFile = $this->_segmentInfos[ $segCount ]->openCompoundFile('.fdx'); -        $fdxFile->seek( ($id-$segmentStartId)*8, SEEK_CUR ); -        $fieldValuesPosition = $fdxFile->readLong(); - -        $fdtFile = $this->_segmentInfos[ $segCount ]->openCompoundFile('.fdt'); -        $fdtFile->seek( $fieldValuesPosition, SEEK_CUR ); -        $fieldCount = $fdtFile->readVInt(); - -        $doc = new Zend_Search_Lucene_Document(); -        for( $count = 0; $count < $fieldCount; $count++ ) { -            $fieldNum = $fdtFile->readVInt(); -            $bits = $fdtFile->readByte(); - -            $fieldInfo = $this->_segmentInfos[ $segCount ]->getField($fieldNum); - -            if( !($bits & 2) ) { // Text data -                $field = new Zend_Search_Lucene_Field($fieldInfo->name, -                                                      $fdtFile->readString(), -                                                      true, -                                                      $fieldInfo->isIndexed, -                                                      $bits & 1 ); -            } else { -                $field = new Zend_Search_Lucene_Field($fieldInfo->name, -                                                      $fdtFile->readBinary(), -                                                      true, -                                                      $fieldInfo->isIndexed, -                                                      $bits & 1 ); -            } - -            $doc->addField($field); -        } - -        return $doc; -    } - - -    /** -     * Returns an array of all the documents which contain term. -     * -     * @param Zend_Search_Lucene_Index_Term $term -     * @return array -     */ -    public function termDocs(Zend_Search_Lucene_Index_Term $term) -    { -        $result = array(); -        $segmentStartDocId = 0; - -        foreach ($this->_segmentInfos as $segInfo) { -            $termInfo = $segInfo->getTermInfo($term); - -            if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) { -                $segmentStartDocId += $segInfo->count(); -                continue; -            } - -            $frqFile = $segInfo->openCompoundFile('.frq'); -            $frqFile->seek($termInfo->freqPointer,SEEK_CUR); -            $docId = 0; -            for( $count=0; $count < $termInfo->docFreq; $count++ ) { -                $docDelta = $frqFile->readVInt(); -                if( $docDelta % 2 == 1 ) { -                    $docId += ($docDelta-1)/2; -                } else { -                    $docId += $docDelta/2; -                    // read freq -                    $frqFile->readVInt(); -                } -                $result[] = $segmentStartDocId + $docId; -            } - -            $segmentStartDocId += $segInfo->count(); -        } - -        return $result; -    } - - -    /** -     * Returns an array of all term positions in the documents. -     * Return array structure: array( docId => array( pos1, pos2, ...), ...) -     * -     * @param Zend_Search_Lucene_Index_Term $term -     * @return array -     */ -    public function termPositions(Zend_Search_Lucene_Index_Term $term) -    { -        $result = array(); -        $segmentStartDocId = 0; -        foreach( $this->_segmentInfos as $segInfo ) { -            $termInfo = $segInfo->getTermInfo($term); - -            if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) { -                $segmentStartDocId += $segInfo->count(); -                continue; -            } - -            $frqFile = $segInfo->openCompoundFile('.frq'); -            $frqFile->seek($termInfo->freqPointer,SEEK_CUR); -            $freqs = array(); -            $docId = 0; - -            for( $count = 0; $count < $termInfo->docFreq; $count++ ) { -                $docDelta = $frqFile->readVInt(); -                if( $docDelta % 2 == 1 ) { -                    $docId += ($docDelta-1)/2; -                    $freqs[ $docId ] = 1; -                } else { -                    $docId += $docDelta/2; -                    $freqs[ $docId ] = $frqFile->readVInt(); -                } -            } - -            $prxFile = $segInfo->openCompoundFile('.prx'); -            $prxFile->seek($termInfo->proxPointer,SEEK_CUR); -            foreach ($freqs as $docId => $freq) { -                $termPosition = 0; -                $positions = array(); - -                for ($count = 0; $count < $freq; $count++ ) { -                    $termPosition += $prxFile->readVInt(); -                    $positions[] = $termPosition; -                } -                $result[ $segmentStartDocId + $docId ] = $positions; -            } - -            $segmentStartDocId += $segInfo->count(); -        } - -        return $result; -    } - - -    /** -     * Returns the number of documents in this index containing the $term. -     * -     * @param Zend_Search_Lucene_Index_Term $term -     * @return integer -     */ -    public function docFreq(Zend_Search_Lucene_Index_Term $term) -    { -        $result = 0; -        foreach ($this->_segmentInfos as $segInfo) { -            $termInfo = $segInfo->getTermInfo($term); -            if ($termInfo !== null) { -                $result += $termInfo->docFreq; -            } -        } - -        return $result; -    } - - -    /** -     * Retrive similarity used by index reader -     * -     * @return Zend_Search_Lucene_Search_Similarity -     */ -    public function getSimilarity() -    { -        return Zend_Search_Lucene_Search_Similarity::getDefault(); -    } - - -    /** -     * Returns a normalization factor for "field, document" pair. -     * -     * @param integer $id -     * @param string $fieldName -     * @return Zend_Search_Lucene_Document -     */ -    public function norm( $id, $fieldName ) -    { -        if( $id >= $this->_docCount ) -            return null; - -        $segCount = 0; -        $nextSegmentStartId = $this->_segmentInfos[ 0 ]->count(); -        while( $nextSegmentStartId <= $id ) { -               $segCount++; -               $nextSegmentStartId += $this->_segmentInfos[ $segCount ]->count(); -        } - -        $segmentStartId = $nextSegmentStartId - $this->_segmentInfos[ $segCount ]->count(); - -        return $this->_segmentInfos[ $segCount ]->norm($id - $segmentStartId, $fieldName); -    } - - -    /** -     * Adds a document to this index. -     * -     * @param Zend_Search_Lucene_Document $document -     */ -    public function addDocument(Zend_Search_Lucene_Document $document) -    { -        if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) { -            $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory); -        } - -        $this->_writer->addDocument($document); -    } - - -    /** -     * Commit changes resulting from delete() or undeleteAll() operations. -     * -     * @todo delete() and undeleteAll processing. -     */ -    public function commit() -    { -        if ($this->_writer !== null) { -            foreach ($this->_writer->commit() as $segmentName => $segmentInfo) { -                if ($segmentInfo !== null) { -                    $this->_segmentInfos[] = $segmentInfo; -                    $this->_docCount += $segmentInfo->count(); -                } else { -                    foreach ($this->_segmentInfos as $segId => $segInfo) { -                        if ($segInfo->getName() == $segmentName) { -                            unset($this->_segmentInfos[$segId]); -                        } -                    } -                } -            } -        } -    } - - -    /************************************************************************* -    @todo UNIMPLEMENTED -    *************************************************************************/ - -    /** -     * Returns an array of all terms in this index. -     * -     * @todo Implementation -     * @return array -     */ -    public function terms() -    { -        return array(); -    } - - -    /** -     * Returns true if any documents have been deleted from this index. -     * -     * @todo Implementation -     * @return boolean -     */ -    public function hasDeletions() -    { -        return false; -    } - - -    /** -     * Deletes a document from the index.  $doc may contain a Zend_Search_Lucene_Document -     * or the number of the document to delete. -     * -     * @todo Implementation -     * @param mixed $item_to_del -     */ -    public function delete($doc) -    {} - - -    /** -     * Undeletes all documents currently marked as deleted in this index. -     * -     * @todo Implementation -     */ -    public function undeleteAll() -    {} -}
\ No newline at end of file  | 
