diff options
Diffstat (limited to 'buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentInfo.php')
-rw-r--r-- | buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentInfo.php | 412 |
1 files changed, 0 insertions, 412 deletions
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentInfo.php b/buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentInfo.php deleted file mode 100644 index f5c596a0..00000000 --- a/buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentInfo.php +++ /dev/null @@ -1,412 +0,0 @@ -<?php -/** - * Zend Framework - * - * LICENSE - * - * This source file is subject to version 1.0 of the Zend Framework - * license, that is bundled with this package in the file LICENSE, and - * is available through the world-wide-web at the following URL: - * http://www.zend.com/license/framework/1_0.txt. If you did not receive - * a copy of the Zend Framework license and are unable to obtain it - * through the world-wide-web, please send a note to license@zend.com - * so we can mail you a copy immediately. - * - * @package Zend_Search_Lucene - * @subpackage Index - * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) - * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 - */ - - -/** Zend_Search_Lucene_Exception */ -require_once 'Zend/Search/Lucene/Exception.php'; - - -/** - * @package Zend_Search_Lucene - * @subpackage Index - * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) - * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 - */ -class Zend_Search_Lucene_Index_SegmentInfo -{ - /** - * Number of docs in a segment - * - * @var integer - */ - private $_docCount; - - /** - * Segment name - * - * @var string - */ - private $_name; - - /** - * Term Dictionary Index - * Array of the Zend_Search_Lucene_Index_Term objects - * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos - * - * @var array - */ - private $_termDictionary; - - /** - * Term Dictionary Index TermInfos - * Array of the Zend_Search_Lucene_Index_TermInfo objects - * - * @var array - */ - private $_termDictionaryInfos; - - /** - * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment - * - * @var array - */ - private $_fields; - - /** - * Field positions in a dictionary. - * (Term dictionary contains filelds ordered by names) - * - * @var array - */ - private $_fieldsDicPositions; - - - /** - * Associative array where the key is the file name and the value is data offset - * in a compound segment file (.csf). - * - * @var array - */ - private $_segFiles; - - /** - * File system adapter. - * - * @var Zend_Search_Lucene_Storage_Directory_Filesystem - */ - private $_directory; - - /** - * Normalization factors. - * An array fieldName => normVector - * normVector is a binary string. - * Each byte corresponds to an indexed document in a segment and - * encodes normalization factor (float value, encoded by - * Zend_Search_Lucene_Search_Similarity::encodeNorm()) - * - * @var array - */ - private $_norms = array(); - - /** - * Zend_Search_Lucene_Index_SegmentInfo constructor needs Segmentname, - * Documents count and Directory as a parameter. - * - * @param string $name - * @param integer $docCount - * @param Zend_Search_Lucene_Storage_Directory $directory - */ - public function __construct($name, $docCount, $directory) - { - $this->_name = $name; - $this->_docCount = $docCount; - $this->_directory = $directory; - $this->_termDictionary = null; - - $this->_segFiles = array(); - $cfsFile = $this->_directory->getFileObject($name . '.cfs'); - $segFilesCount = $cfsFile->readVInt(); - - for ($count = 0; $count < $segFilesCount; $count++) { - $dataOffset = $cfsFile->readLong(); - $fileName = $cfsFile->readString(); - $this->_segFiles[$fileName] = $dataOffset; - } - - $fnmFile = $this->openCompoundFile('.fnm'); - $fieldsCount = $fnmFile->readVInt(); - $fieldNames = array(); - $fieldNums = array(); - $this->_fields = array(); - for ($count=0; $count < $fieldsCount; $count++) { - $fieldName = $fnmFile->readString(); - $fieldBits = $fnmFile->readByte(); - $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName, - $fieldBits & 1, - $count, - $fieldBits & 2 ); - if ($fieldBits & 0x10) { - // norms are omitted for the indexed field - $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount); - } - - $fieldNums[$count] = $count; - $fieldNames[$count] = $fieldName; - } - array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums); - $this->_fieldsDicPositions = array_flip($fieldNums); - } - - /** - * Opens index file stoted within compound index file - * - * @param string $extension - * @throws Zend_Search_Lucene_Exception - * @return Zend_Search_Lucene_Storage_File - */ - public function openCompoundFile($extension) - { - $filename = $this->_name . $extension; - - if( !isset($this->_segFiles[ $filename ]) ) { - throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain ' - . $filename . ' file.' ); - } - - $file = $this->_directory->getFileObject( $this->_name.".cfs" ); - $file->seek( $this->_segFiles[ $filename ] ); - return $file; - } - - /** - * Returns field index or -1 if field is not found - * - * @param string $fieldName - * @return integer - */ - public function getFieldNum($fieldName) - { - foreach( $this->_fields as $field ) { - if( $field->name == $fieldName ) { - return $field->number; - } - } - - return -1; - } - - /** - * Returns field info for specified field - * - * @param integer $fieldNum - * @return ZSearchFieldInfo - */ - public function getField($fieldNum) - { - return $this->_fields[$fieldNum]; - } - - /** - * Returns array of fields. - * if $indexed parameter is true, then returns only indexed fields. - * - * @param boolean $indexed - * @return array - */ - public function getFields($indexed = false) - { - $result = array(); - foreach( $this->_fields as $field ) { - if( (!$indexed) || $field->isIndexed ) { - $result[ $field->name ] = $field->name; - } - } - return $result; - } - - /** - * Returns the total number of documents in this segment. - * - * @return integer - */ - public function count() - { - return $this->_docCount; - } - - - /** - * Loads Term dictionary from TermInfoIndex file - */ - protected function _loadDictionary() - { - if ($this->_termDictionary !== null) { - return; - } - - $this->_termDictionary = array(); - $this->_termDictionaryInfos = array(); - - $tiiFile = $this->openCompoundFile('.tii'); - $tiVersion = $tiiFile->readInt(); - if ($tiVersion != (int)0xFFFFFFFE) { - throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format'); - } - - $indexTermCount = $tiiFile->readLong(); - $tiiFile->readInt(); // IndexInterval - $skipInterval = $tiiFile->readInt(); - - $prevTerm = ''; - $freqPointer = 0; - $proxPointer = 0; - $indexPointer = 0; - for ($count = 0; $count < $indexTermCount; $count++) { - $termPrefixLength = $tiiFile->readVInt(); - $termSuffix = $tiiFile->readString(); - $termValue = substr( $prevTerm, 0, $termPrefixLength ) . $termSuffix; - - $termFieldNum = $tiiFile->readVInt(); - $docFreq = $tiiFile->readVInt(); - $freqPointer += $tiiFile->readVInt(); - $proxPointer += $tiiFile->readVInt(); - if( $docFreq >= $skipInterval ) { - $skipDelta = $tiiFile->readVInt(); - } else { - $skipDelta = 0; - } - - $indexPointer += $tiiFile->readVInt(); - - $this->_termDictionary[] = new Zend_Search_Lucene_Index_Term($termValue,$termFieldNum); - $this->_termDictionaryInfos[] = - new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipDelta, $indexPointer); - $prevTerm = $termValue; - } - } - - - /** - * Return segment name - * - * @return string - */ - public function getName() - { - return $this->_name; - } - - - /** - * Scans terms dictionary and returns term info - * - * @param Zend_Search_Lucene_Index_Term $term - * @return Zend_Search_Lucene_Index_TermInfo - */ - public function getTermInfo($term) - { - $this->_loadDictionary(); - - $searchField = $this->getFieldNum($term->field); - - if ($searchField == -1) { - return null; - } - $searchDicField = $this->_fieldsDicPositions[$searchField]; - - // search for appropriate value in dictionary - $lowIndex = 0; - $highIndex = count($this->_termDictionary)-1; - while ($highIndex >= $lowIndex) { - // $mid = ($highIndex - $lowIndex)/2; - $mid = ($highIndex + $lowIndex) >> 1; - $midTerm = $this->_termDictionary[$mid]; - - $delta = $searchDicField - $this->_fieldsDicPositions[$midTerm->field]; - if ($delta == 0) { - $delta = strcmp($term->text, $midTerm->text); - } - - if ($delta < 0) { - $highIndex = $mid-1; - } elseif ($delta > 0) { - $lowIndex = $mid+1; - } else { - return $this->_termDictionaryInfos[$mid]; // We got it! - } - } - - if ($highIndex == -1) { - // Term is out of the dictionary range - return null; - } - - $prevPosition = $highIndex; - $prevTerm = $this->_termDictionary[$prevPosition]; - $prevTermInfo = $this->_termDictionaryInfos[ $prevPosition ]; - - $tisFile = $this->openCompoundFile('.tis'); - $tiVersion = $tisFile->readInt(); - if ($tiVersion != (int)0xFFFFFFFE) { - throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format'); - } - - $termCount = $tisFile->readLong(); - $indexInterval = $tisFile->readInt(); - $skipInterval = $tisFile->readInt(); - - $tisFile->seek($prevTermInfo->indexPointer - 20 /* header size*/, SEEK_CUR); - - $termValue = $prevTerm->text; - $termFieldNum = $prevTerm->field; - $freqPointer = $prevTermInfo->freqPointer; - $proxPointer = $prevTermInfo->proxPointer; - for ($count = $prevPosition*$indexInterval + 1; - $count < $termCount && - ( $this->_fieldsDicPositions[ $termFieldNum ] < $searchDicField || - ($this->_fieldsDicPositions[ $termFieldNum ] == $searchDicField && - strcmp($termValue, $term->text) < 0) ); - $count++) { - $termPrefixLength = $tisFile->readVInt(); - $termSuffix = $tisFile->readString(); - $termFieldNum = $tisFile->readVInt(); - $termValue = substr( $termValue, 0, $termPrefixLength ) . $termSuffix; - - $docFreq = $tisFile->readVInt(); - $freqPointer += $tisFile->readVInt(); - $proxPointer += $tisFile->readVInt(); - if( $docFreq >= $skipInterval ) { - $skipOffset = $tisFile->readVInt(); - } else { - $skipOffset = 0; - } - } - - if ($termFieldNum == $searchField && $termValue == $term->text) { - return new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset); - } else { - return null; - } - } - - /** - * Returns normalization factor for specified documents - * - * @param integer $id - * @param string $fieldName - * @return string - */ - public function norm($id, $fieldName) - { - $fieldNum = $this->getFieldNum($fieldName); - - if ( !($this->_fields[$fieldNum]->isIndexed) ) { - return null; - } - - if ( !isset( $this->_norms[$fieldNum] )) { - $fFile = $this->openCompoundFile('.f' . $fieldNum); - $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount); - } - - return Zend_Search_Lucene_Search_Similarity::decodeNorm( ord($this->_norms[$fieldNum]{$id}) ); - } -} - |