summaryrefslogtreecommitdiff
path: root/buildscripts/texbuilder/Zend/Search/Lucene.php
diff options
context:
space:
mode:
Diffstat (limited to 'buildscripts/texbuilder/Zend/Search/Lucene.php')
-rw-r--r--buildscripts/texbuilder/Zend/Search/Lucene.php569
1 files changed, 0 insertions, 569 deletions
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene.php b/buildscripts/texbuilder/Zend/Search/Lucene.php
deleted file mode 100644
index 700a8b8a..00000000
--- a/buildscripts/texbuilder/Zend/Search/Lucene.php
+++ /dev/null
@@ -1,569 +0,0 @@
-<?php
-/**
- * Zend Framework
- *
- * LICENSE
- *
- * This source file is subject to version 1.0 of the Zend Framework
- * license, that is bundled with this package in the file LICENSE, and
- * is available through the world-wide-web at the following URL:
- * http://www.zend.com/license/framework/1_0.txt. If you did not receive
- * a copy of the Zend Framework license and are unable to obtain it
- * through the world-wide-web, please send a note to license@zend.com
- * so we can mail you a copy immediately.
- *
- * @package Zend_Search_Lucene
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-
-
-/** Zend_Search_Lucene_Exception */
-require_once 'Zend/Search/Lucene/Exception.php';
-
-/** Zend_Search_Lucene_Document */
-require_once 'Zend/Search/Lucene/Document.php';
-
-/** Zend_Search_Lucene_Storage_Directory */
-require_once 'Zend/Search/Lucene/Storage/Directory/Filesystem.php';
-
-/** Zend_Search_Lucene_Index_Term */
-require_once 'Zend/Search/Lucene/Index/Term.php';
-
-/** Zend_Search_Lucene_Index_TermInfo */
-require_once 'Zend/Search/Lucene/Index/TermInfo.php';
-
-/** Zend_Search_Lucene_Index_SegmentInfo */
-require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
-
-/** Zend_Search_Lucene_Index_FieldInfo */
-require_once 'Zend/Search/Lucene/Index/FieldInfo.php';
-
-/** Zend_Search_Lucene_Index_Writer */
-require_once 'Zend/Search/Lucene/Index/Writer.php';
-
-/** Zend_Search_Lucene_Search_QueryParser */
-require_once 'Zend/Search/Lucene/Search/QueryParser.php';
-
-/** Zend_Search_Lucene_Search_QueryHit */
-require_once 'Zend/Search/Lucene/Search/QueryHit.php';
-
-/** Zend_Search_Lucene_Search_Similarity */
-require_once 'Zend/Search/Lucene/Search/Similarity.php';
-
-
-/**
- * @package Zend_Search_Lucene
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-class Zend_Search_Lucene
-{
- /**
- * File system adapter.
- *
- * @var Zend_Search_Lucene_Storage_Directory
- */
- private $_directory = null;
-
- /**
- * File system adapter closing option
- *
- * @var boolean
- */
- private $_closeDirOnExit = true;
-
- /**
- * Writer for this index, not instantiated unless required.
- *
- * @var Zend_Search_Lucene_Index_Writer
- */
- private $_writer = null;
-
- /**
- * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index.
- *
- * @var array Zend_Search_Lucene_Index_SegmentInfo
- */
- private $_segmentInfos = array();
-
- /**
- * Number of documents in this index.
- *
- * @var integer
- */
- private $_docCount = 0;
-
-
- /**
- * Opens the index.
- *
- * IndexReader constructor needs Directory as a parameter. It should be
- * a string with a path to the index folder or a Directory object.
- *
- * @param mixed $directory
- * @throws Zend_Search_Lucene_Exception
- */
- public function __construct($directory = null, $create = false)
- {
- if ($directory === null) {
- throw new Zend_Search_Exception('No index directory specified');
- }
-
- if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) {
- $this->_directory = $directory;
- $this->_closeDirOnExit = false;
- } else {
- $this->_directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory);
- $this->_closeDirOnExit = true;
- }
-
- if ($create) {
- $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, true);
- } else {
- $this->_writer = null;
- }
-
- $this->_segmentInfos = array();
-
- $segmentsFile = $this->_directory->getFileObject('segments');
-
- $format = $segmentsFile->readInt();
-
- if ($format != (int)0xFFFFFFFF) {
- throw new Zend_Search_Lucene_Exception('Wrong segments file format');
- }
-
- // read version
- $segmentsFile->readLong();
-
- // read counter
- $segmentsFile->readInt();
-
- $segments = $segmentsFile->readInt();
-
- $this->_docCount = 0;
-
- // read segmentInfos
- for ($count = 0; $count < $segments; $count++) {
- $segName = $segmentsFile->readString();
- $segSize = $segmentsFile->readInt();
- $this->_docCount += $segSize;
-
- $this->_segmentInfos[$count] =
- new Zend_Search_Lucene_Index_SegmentInfo($segName,
- $segSize,
- $this->_directory);
- }
- }
-
-
- /**
- * Object destructor
- */
- public function __destruct()
- {
- $this->commit();
-
- if ($this->_closeDirOnExit) {
- $this->_directory->close();
- }
- }
-
- /**
- * Returns an instance of Zend_Search_Lucene_Index_Writer for the index
- *
- * @return Zend_Search_Lucene_Index_Writer
- */
- public function getIndexWriter()
- {
- if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) {
- $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory);
- }
-
- return $this->_writer;
- }
-
-
- /**
- * Returns the Zend_Search_Lucene_Storage_Directory instance for this index.
- *
- * @return Zend_Search_Lucene_Storage_Directory
- */
- public function getDirectory()
- {
- return $this->_directory;
- }
-
-
- /**
- * Returns the total number of documents in this index.
- *
- * @return integer
- */
- public function count()
- {
- return $this->_docCount;
- }
-
-
- /**
- * Performs a query against the index and returns an array
- * of Zend_Search_Lucene_Search_QueryHit objects.
- * Input is a string or Zend_Search_Lucene_Search_Query.
- *
- * @param mixed $query
- * @return array ZSearchHit
- */
- public function find($query)
- {
- if (is_string($query)) {
- $query = Zend_Search_Lucene_Search_QueryParser::parse($query);
- }
-
- if (!$query instanceof Zend_Search_Lucene_Search_Query) {
- throw new Zend_Search_Lucene_Exception('Query must be a string or Zend_Search_Lucene_Search_Query object');
- }
-
- $this->commit();
-
- $hits = array();
- $scores = array();
-
- $docNum = $this->count();
- for( $count=0; $count < $docNum; $count++ ) {
- $docScore = $query->score( $count, $this);
- if( $docScore != 0 ) {
- $hit = new Zend_Search_Lucene_Search_QueryHit($this);
- $hit->id = $count;
- $hit->score = $docScore;
-
- $hits[] = $hit;
- $scores[] = $docScore;
- }
- }
- array_multisort($scores, SORT_DESC, SORT_REGULAR, $hits);
-
- return $hits;
- }
-
-
- /**
- * Returns a list of all unique field names that exist in this index.
- *
- * @param boolean $indexed
- * @return array
- */
- public function getFieldNames($indexed = false)
- {
- $result = array();
- foreach( $this->_segmentInfos as $segmentInfo ) {
- $result = array_merge($result, $segmentInfo->getFields($indexed));
- }
- return $result;
- }
-
-
- /**
- * Returns a Zend_Search_Lucene_Document object for the document
- * number $id in this index.
- *
- * @param integer|Zend_Search_Lucene_Search_QueryHit $id
- * @return Zend_Search_Lucene_Document
- */
- public function getDocument($id)
- {
- if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
- /* @var $id Zend_Search_Lucene_Search_QueryHit */
- $id = $id->id;
- }
-
- if ($id >= $this->_docCount) {
- /**
- * @todo exception here?
- */
- return null;
- }
-
- $segCount = 0;
- $nextSegmentStartId = $this->_segmentInfos[ 0 ]->count();
- while( $nextSegmentStartId <= $id ) {
- $segCount++;
- $nextSegmentStartId += $this->_segmentInfos[ $segCount ]->count();
- }
- $segmentStartId = $nextSegmentStartId - $this->_segmentInfos[ $segCount ]->count();
-
- $fdxFile = $this->_segmentInfos[ $segCount ]->openCompoundFile('.fdx');
- $fdxFile->seek( ($id-$segmentStartId)*8, SEEK_CUR );
- $fieldValuesPosition = $fdxFile->readLong();
-
- $fdtFile = $this->_segmentInfos[ $segCount ]->openCompoundFile('.fdt');
- $fdtFile->seek( $fieldValuesPosition, SEEK_CUR );
- $fieldCount = $fdtFile->readVInt();
-
- $doc = new Zend_Search_Lucene_Document();
- for( $count = 0; $count < $fieldCount; $count++ ) {
- $fieldNum = $fdtFile->readVInt();
- $bits = $fdtFile->readByte();
-
- $fieldInfo = $this->_segmentInfos[ $segCount ]->getField($fieldNum);
-
- if( !($bits & 2) ) { // Text data
- $field = new Zend_Search_Lucene_Field($fieldInfo->name,
- $fdtFile->readString(),
- true,
- $fieldInfo->isIndexed,
- $bits & 1 );
- } else {
- $field = new Zend_Search_Lucene_Field($fieldInfo->name,
- $fdtFile->readBinary(),
- true,
- $fieldInfo->isIndexed,
- $bits & 1 );
- }
-
- $doc->addField($field);
- }
-
- return $doc;
- }
-
-
- /**
- * Returns an array of all the documents which contain term.
- *
- * @param Zend_Search_Lucene_Index_Term $term
- * @return array
- */
- public function termDocs(Zend_Search_Lucene_Index_Term $term)
- {
- $result = array();
- $segmentStartDocId = 0;
-
- foreach ($this->_segmentInfos as $segInfo) {
- $termInfo = $segInfo->getTermInfo($term);
-
- if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
- $segmentStartDocId += $segInfo->count();
- continue;
- }
-
- $frqFile = $segInfo->openCompoundFile('.frq');
- $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
- $docId = 0;
- for( $count=0; $count < $termInfo->docFreq; $count++ ) {
- $docDelta = $frqFile->readVInt();
- if( $docDelta % 2 == 1 ) {
- $docId += ($docDelta-1)/2;
- } else {
- $docId += $docDelta/2;
- // read freq
- $frqFile->readVInt();
- }
- $result[] = $segmentStartDocId + $docId;
- }
-
- $segmentStartDocId += $segInfo->count();
- }
-
- return $result;
- }
-
-
- /**
- * Returns an array of all term positions in the documents.
- * Return array structure: array( docId => array( pos1, pos2, ...), ...)
- *
- * @param Zend_Search_Lucene_Index_Term $term
- * @return array
- */
- public function termPositions(Zend_Search_Lucene_Index_Term $term)
- {
- $result = array();
- $segmentStartDocId = 0;
- foreach( $this->_segmentInfos as $segInfo ) {
- $termInfo = $segInfo->getTermInfo($term);
-
- if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
- $segmentStartDocId += $segInfo->count();
- continue;
- }
-
- $frqFile = $segInfo->openCompoundFile('.frq');
- $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
- $freqs = array();
- $docId = 0;
-
- for( $count = 0; $count < $termInfo->docFreq; $count++ ) {
- $docDelta = $frqFile->readVInt();
- if( $docDelta % 2 == 1 ) {
- $docId += ($docDelta-1)/2;
- $freqs[ $docId ] = 1;
- } else {
- $docId += $docDelta/2;
- $freqs[ $docId ] = $frqFile->readVInt();
- }
- }
-
- $prxFile = $segInfo->openCompoundFile('.prx');
- $prxFile->seek($termInfo->proxPointer,SEEK_CUR);
- foreach ($freqs as $docId => $freq) {
- $termPosition = 0;
- $positions = array();
-
- for ($count = 0; $count < $freq; $count++ ) {
- $termPosition += $prxFile->readVInt();
- $positions[] = $termPosition;
- }
- $result[ $segmentStartDocId + $docId ] = $positions;
- }
-
- $segmentStartDocId += $segInfo->count();
- }
-
- return $result;
- }
-
-
- /**
- * Returns the number of documents in this index containing the $term.
- *
- * @param Zend_Search_Lucene_Index_Term $term
- * @return integer
- */
- public function docFreq(Zend_Search_Lucene_Index_Term $term)
- {
- $result = 0;
- foreach ($this->_segmentInfos as $segInfo) {
- $termInfo = $segInfo->getTermInfo($term);
- if ($termInfo !== null) {
- $result += $termInfo->docFreq;
- }
- }
-
- return $result;
- }
-
-
- /**
- * Retrive similarity used by index reader
- *
- * @return Zend_Search_Lucene_Search_Similarity
- */
- public function getSimilarity()
- {
- return Zend_Search_Lucene_Search_Similarity::getDefault();
- }
-
-
- /**
- * Returns a normalization factor for "field, document" pair.
- *
- * @param integer $id
- * @param string $fieldName
- * @return Zend_Search_Lucene_Document
- */
- public function norm( $id, $fieldName )
- {
- if( $id >= $this->_docCount )
- return null;
-
- $segCount = 0;
- $nextSegmentStartId = $this->_segmentInfos[ 0 ]->count();
- while( $nextSegmentStartId <= $id ) {
- $segCount++;
- $nextSegmentStartId += $this->_segmentInfos[ $segCount ]->count();
- }
-
- $segmentStartId = $nextSegmentStartId - $this->_segmentInfos[ $segCount ]->count();
-
- return $this->_segmentInfos[ $segCount ]->norm($id - $segmentStartId, $fieldName);
- }
-
-
- /**
- * Adds a document to this index.
- *
- * @param Zend_Search_Lucene_Document $document
- */
- public function addDocument(Zend_Search_Lucene_Document $document)
- {
- if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) {
- $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory);
- }
-
- $this->_writer->addDocument($document);
- }
-
-
- /**
- * Commit changes resulting from delete() or undeleteAll() operations.
- *
- * @todo delete() and undeleteAll processing.
- */
- public function commit()
- {
- if ($this->_writer !== null) {
- foreach ($this->_writer->commit() as $segmentName => $segmentInfo) {
- if ($segmentInfo !== null) {
- $this->_segmentInfos[] = $segmentInfo;
- $this->_docCount += $segmentInfo->count();
- } else {
- foreach ($this->_segmentInfos as $segId => $segInfo) {
- if ($segInfo->getName() == $segmentName) {
- unset($this->_segmentInfos[$segId]);
- }
- }
- }
- }
- }
- }
-
-
- /*************************************************************************
- @todo UNIMPLEMENTED
- *************************************************************************/
-
- /**
- * Returns an array of all terms in this index.
- *
- * @todo Implementation
- * @return array
- */
- public function terms()
- {
- return array();
- }
-
-
- /**
- * Returns true if any documents have been deleted from this index.
- *
- * @todo Implementation
- * @return boolean
- */
- public function hasDeletions()
- {
- return false;
- }
-
-
- /**
- * Deletes a document from the index. $doc may contain a Zend_Search_Lucene_Document
- * or the number of the document to delete.
- *
- * @todo Implementation
- * @param mixed $item_to_del
- */
- public function delete($doc)
- {}
-
-
- /**
- * Undeletes all documents currently marked as deleted in this index.
- *
- * @todo Implementation
- */
- public function undeleteAll()
- {}
-} \ No newline at end of file