summaryrefslogtreecommitdiff
path: root/buildscripts/texbuilder/Zend/Search/Lucene/Index
diff options
context:
space:
mode:
Diffstat (limited to 'buildscripts/texbuilder/Zend/Search/Lucene/Index')
-rw-r--r--buildscripts/texbuilder/Zend/Search/Lucene/Index/FieldInfo.php43
-rw-r--r--buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentInfo.php412
-rw-r--r--buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentWriter.php491
-rw-r--r--buildscripts/texbuilder/Zend/Search/Lucene/Index/Term.php70
-rw-r--r--buildscripts/texbuilder/Zend/Search/Lucene/Index/TermInfo.php77
-rw-r--r--buildscripts/texbuilder/Zend/Search/Lucene/Index/Writer.php308
6 files changed, 0 insertions, 1401 deletions
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Index/FieldInfo.php b/buildscripts/texbuilder/Zend/Search/Lucene/Index/FieldInfo.php
deleted file mode 100644
index eaca4ecf..00000000
--- a/buildscripts/texbuilder/Zend/Search/Lucene/Index/FieldInfo.php
+++ /dev/null
@@ -1,43 +0,0 @@
-<?php
-/**
- * Zend Framework
- *
- * LICENSE
- *
- * This source file is subject to version 1.0 of the Zend Framework
- * license, that is bundled with this package in the file LICENSE, and
- * is available through the world-wide-web at the following URL:
- * http://www.zend.com/license/framework/1_0.txt. If you did not receive
- * a copy of the Zend Framework license and are unable to obtain it
- * through the world-wide-web, please send a note to license@zend.com
- * so we can mail you a copy immediately.
- *
- * @package Zend_Search_Lucene
- * @subpackage Index
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-
-
-/**
- * @package Zend_Search_Lucene
- * @subpackage Index
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-class Zend_Search_Lucene_Index_FieldInfo
-{
- public $name;
- public $isIndexed;
- public $number;
- public $storeTermVector;
-
- public function __construct( $name, $isIndexed, $number, $storeTermVector )
- {
- $this->name = $name;
- $this->isIndexed = $isIndexed;
- $this->number = $number;
- $this->storeTermVector = $storeTermVector;
- }
-}
-
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentInfo.php b/buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentInfo.php
deleted file mode 100644
index f5c596a0..00000000
--- a/buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentInfo.php
+++ /dev/null
@@ -1,412 +0,0 @@
-<?php
-/**
- * Zend Framework
- *
- * LICENSE
- *
- * This source file is subject to version 1.0 of the Zend Framework
- * license, that is bundled with this package in the file LICENSE, and
- * is available through the world-wide-web at the following URL:
- * http://www.zend.com/license/framework/1_0.txt. If you did not receive
- * a copy of the Zend Framework license and are unable to obtain it
- * through the world-wide-web, please send a note to license@zend.com
- * so we can mail you a copy immediately.
- *
- * @package Zend_Search_Lucene
- * @subpackage Index
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-
-
-/** Zend_Search_Lucene_Exception */
-require_once 'Zend/Search/Lucene/Exception.php';
-
-
-/**
- * @package Zend_Search_Lucene
- * @subpackage Index
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-class Zend_Search_Lucene_Index_SegmentInfo
-{
- /**
- * Number of docs in a segment
- *
- * @var integer
- */
- private $_docCount;
-
- /**
- * Segment name
- *
- * @var string
- */
- private $_name;
-
- /**
- * Term Dictionary Index
- * Array of the Zend_Search_Lucene_Index_Term objects
- * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
- *
- * @var array
- */
- private $_termDictionary;
-
- /**
- * Term Dictionary Index TermInfos
- * Array of the Zend_Search_Lucene_Index_TermInfo objects
- *
- * @var array
- */
- private $_termDictionaryInfos;
-
- /**
- * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
- *
- * @var array
- */
- private $_fields;
-
- /**
- * Field positions in a dictionary.
- * (Term dictionary contains filelds ordered by names)
- *
- * @var array
- */
- private $_fieldsDicPositions;
-
-
- /**
- * Associative array where the key is the file name and the value is data offset
- * in a compound segment file (.csf).
- *
- * @var array
- */
- private $_segFiles;
-
- /**
- * File system adapter.
- *
- * @var Zend_Search_Lucene_Storage_Directory_Filesystem
- */
- private $_directory;
-
- /**
- * Normalization factors.
- * An array fieldName => normVector
- * normVector is a binary string.
- * Each byte corresponds to an indexed document in a segment and
- * encodes normalization factor (float value, encoded by
- * Zend_Search_Lucene_Search_Similarity::encodeNorm())
- *
- * @var array
- */
- private $_norms = array();
-
- /**
- * Zend_Search_Lucene_Index_SegmentInfo constructor needs Segmentname,
- * Documents count and Directory as a parameter.
- *
- * @param string $name
- * @param integer $docCount
- * @param Zend_Search_Lucene_Storage_Directory $directory
- */
- public function __construct($name, $docCount, $directory)
- {
- $this->_name = $name;
- $this->_docCount = $docCount;
- $this->_directory = $directory;
- $this->_termDictionary = null;
-
- $this->_segFiles = array();
- $cfsFile = $this->_directory->getFileObject($name . '.cfs');
- $segFilesCount = $cfsFile->readVInt();
-
- for ($count = 0; $count < $segFilesCount; $count++) {
- $dataOffset = $cfsFile->readLong();
- $fileName = $cfsFile->readString();
- $this->_segFiles[$fileName] = $dataOffset;
- }
-
- $fnmFile = $this->openCompoundFile('.fnm');
- $fieldsCount = $fnmFile->readVInt();
- $fieldNames = array();
- $fieldNums = array();
- $this->_fields = array();
- for ($count=0; $count < $fieldsCount; $count++) {
- $fieldName = $fnmFile->readString();
- $fieldBits = $fnmFile->readByte();
- $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName,
- $fieldBits & 1,
- $count,
- $fieldBits & 2 );
- if ($fieldBits & 0x10) {
- // norms are omitted for the indexed field
- $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount);
- }
-
- $fieldNums[$count] = $count;
- $fieldNames[$count] = $fieldName;
- }
- array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums);
- $this->_fieldsDicPositions = array_flip($fieldNums);
- }
-
- /**
- * Opens index file stoted within compound index file
- *
- * @param string $extension
- * @throws Zend_Search_Lucene_Exception
- * @return Zend_Search_Lucene_Storage_File
- */
- public function openCompoundFile($extension)
- {
- $filename = $this->_name . $extension;
-
- if( !isset($this->_segFiles[ $filename ]) ) {
- throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
- . $filename . ' file.' );
- }
-
- $file = $this->_directory->getFileObject( $this->_name.".cfs" );
- $file->seek( $this->_segFiles[ $filename ] );
- return $file;
- }
-
- /**
- * Returns field index or -1 if field is not found
- *
- * @param string $fieldName
- * @return integer
- */
- public function getFieldNum($fieldName)
- {
- foreach( $this->_fields as $field ) {
- if( $field->name == $fieldName ) {
- return $field->number;
- }
- }
-
- return -1;
- }
-
- /**
- * Returns field info for specified field
- *
- * @param integer $fieldNum
- * @return ZSearchFieldInfo
- */
- public function getField($fieldNum)
- {
- return $this->_fields[$fieldNum];
- }
-
- /**
- * Returns array of fields.
- * if $indexed parameter is true, then returns only indexed fields.
- *
- * @param boolean $indexed
- * @return array
- */
- public function getFields($indexed = false)
- {
- $result = array();
- foreach( $this->_fields as $field ) {
- if( (!$indexed) || $field->isIndexed ) {
- $result[ $field->name ] = $field->name;
- }
- }
- return $result;
- }
-
- /**
- * Returns the total number of documents in this segment.
- *
- * @return integer
- */
- public function count()
- {
- return $this->_docCount;
- }
-
-
- /**
- * Loads Term dictionary from TermInfoIndex file
- */
- protected function _loadDictionary()
- {
- if ($this->_termDictionary !== null) {
- return;
- }
-
- $this->_termDictionary = array();
- $this->_termDictionaryInfos = array();
-
- $tiiFile = $this->openCompoundFile('.tii');
- $tiVersion = $tiiFile->readInt();
- if ($tiVersion != (int)0xFFFFFFFE) {
- throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format');
- }
-
- $indexTermCount = $tiiFile->readLong();
- $tiiFile->readInt(); // IndexInterval
- $skipInterval = $tiiFile->readInt();
-
- $prevTerm = '';
- $freqPointer = 0;
- $proxPointer = 0;
- $indexPointer = 0;
- for ($count = 0; $count < $indexTermCount; $count++) {
- $termPrefixLength = $tiiFile->readVInt();
- $termSuffix = $tiiFile->readString();
- $termValue = substr( $prevTerm, 0, $termPrefixLength ) . $termSuffix;
-
- $termFieldNum = $tiiFile->readVInt();
- $docFreq = $tiiFile->readVInt();
- $freqPointer += $tiiFile->readVInt();
- $proxPointer += $tiiFile->readVInt();
- if( $docFreq >= $skipInterval ) {
- $skipDelta = $tiiFile->readVInt();
- } else {
- $skipDelta = 0;
- }
-
- $indexPointer += $tiiFile->readVInt();
-
- $this->_termDictionary[] = new Zend_Search_Lucene_Index_Term($termValue,$termFieldNum);
- $this->_termDictionaryInfos[] =
- new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipDelta, $indexPointer);
- $prevTerm = $termValue;
- }
- }
-
-
- /**
- * Return segment name
- *
- * @return string
- */
- public function getName()
- {
- return $this->_name;
- }
-
-
- /**
- * Scans terms dictionary and returns term info
- *
- * @param Zend_Search_Lucene_Index_Term $term
- * @return Zend_Search_Lucene_Index_TermInfo
- */
- public function getTermInfo($term)
- {
- $this->_loadDictionary();
-
- $searchField = $this->getFieldNum($term->field);
-
- if ($searchField == -1) {
- return null;
- }
- $searchDicField = $this->_fieldsDicPositions[$searchField];
-
- // search for appropriate value in dictionary
- $lowIndex = 0;
- $highIndex = count($this->_termDictionary)-1;
- while ($highIndex >= $lowIndex) {
- // $mid = ($highIndex - $lowIndex)/2;
- $mid = ($highIndex + $lowIndex) >> 1;
- $midTerm = $this->_termDictionary[$mid];
-
- $delta = $searchDicField - $this->_fieldsDicPositions[$midTerm->field];
- if ($delta == 0) {
- $delta = strcmp($term->text, $midTerm->text);
- }
-
- if ($delta < 0) {
- $highIndex = $mid-1;
- } elseif ($delta > 0) {
- $lowIndex = $mid+1;
- } else {
- return $this->_termDictionaryInfos[$mid]; // We got it!
- }
- }
-
- if ($highIndex == -1) {
- // Term is out of the dictionary range
- return null;
- }
-
- $prevPosition = $highIndex;
- $prevTerm = $this->_termDictionary[$prevPosition];
- $prevTermInfo = $this->_termDictionaryInfos[ $prevPosition ];
-
- $tisFile = $this->openCompoundFile('.tis');
- $tiVersion = $tisFile->readInt();
- if ($tiVersion != (int)0xFFFFFFFE) {
- throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
- }
-
- $termCount = $tisFile->readLong();
- $indexInterval = $tisFile->readInt();
- $skipInterval = $tisFile->readInt();
-
- $tisFile->seek($prevTermInfo->indexPointer - 20 /* header size*/, SEEK_CUR);
-
- $termValue = $prevTerm->text;
- $termFieldNum = $prevTerm->field;
- $freqPointer = $prevTermInfo->freqPointer;
- $proxPointer = $prevTermInfo->proxPointer;
- for ($count = $prevPosition*$indexInterval + 1;
- $count < $termCount &&
- ( $this->_fieldsDicPositions[ $termFieldNum ] < $searchDicField ||
- ($this->_fieldsDicPositions[ $termFieldNum ] == $searchDicField &&
- strcmp($termValue, $term->text) < 0) );
- $count++) {
- $termPrefixLength = $tisFile->readVInt();
- $termSuffix = $tisFile->readString();
- $termFieldNum = $tisFile->readVInt();
- $termValue = substr( $termValue, 0, $termPrefixLength ) . $termSuffix;
-
- $docFreq = $tisFile->readVInt();
- $freqPointer += $tisFile->readVInt();
- $proxPointer += $tisFile->readVInt();
- if( $docFreq >= $skipInterval ) {
- $skipOffset = $tisFile->readVInt();
- } else {
- $skipOffset = 0;
- }
- }
-
- if ($termFieldNum == $searchField && $termValue == $term->text) {
- return new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
- } else {
- return null;
- }
- }
-
- /**
- * Returns normalization factor for specified documents
- *
- * @param integer $id
- * @param string $fieldName
- * @return string
- */
- public function norm($id, $fieldName)
- {
- $fieldNum = $this->getFieldNum($fieldName);
-
- if ( !($this->_fields[$fieldNum]->isIndexed) ) {
- return null;
- }
-
- if ( !isset( $this->_norms[$fieldNum] )) {
- $fFile = $this->openCompoundFile('.f' . $fieldNum);
- $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount);
- }
-
- return Zend_Search_Lucene_Search_Similarity::decodeNorm( ord($this->_norms[$fieldNum]{$id}) );
- }
-}
-
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentWriter.php b/buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentWriter.php
deleted file mode 100644
index f90d6ed3..00000000
--- a/buildscripts/texbuilder/Zend/Search/Lucene/Index/SegmentWriter.php
+++ /dev/null
@@ -1,491 +0,0 @@
-<?php
-/**
- * Zend Framework
- *
- * LICENSE
- *
- * This source file is subject to version 1.0 of the Zend Framework
- * license, that is bundled with this package in the file LICENSE, and
- * is available through the world-wide-web at the following URL:
- * http://www.zend.com/license/framework/1_0.txt. If you did not receive
- * a copy of the Zend Framework license and are unable to obtain it
- * through the world-wide-web, please send a note to license@zend.com
- * so we can mail you a copy immediately.
- *
- * @package Zend_Search_Lucene
- * @subpackage Index
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-
-
-/** Zend_Search_Lucene_Exception */
-require_once 'Zend/Search/Lucene/Exception.php';
-
-/** Zend_Search_Lucene_Analysis_Analyzer */
-require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
-
-/** Zend_Search_Lucene_Index_SegmentInfo */
-require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
-
-
-/**
- * @package Zend_Search_Lucene
- * @subpackage Index
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-class Zend_Search_Lucene_Index_SegmentWriter
-{
- /**
- * Expert: The fraction of terms in the "dictionary" which should be stored
- * in RAM. Smaller values use more memory, but make searching slightly
- * faster, while larger values use less memory and make searching slightly
- * slower. Searching is typically not dominated by dictionary lookup, so
- * tweaking this is rarely useful.
- *
- * @var integer
- */
- static public $indexInterval = 128;
-
- /** Expert: The fraction of TermDocs entries stored in skip tables.
- * Larger values result in smaller indexes, greater acceleration, but fewer
- * accelerable cases, while smaller values result in bigger indexes,
- * less acceleration and more
- * accelerable cases. More detailed experiments would be useful here.
- *
- * 0x0x7FFFFFFF indicates that we don't use skip data
- * Default value is 16
- *
- * @var integer
- */
- static public $skipInterval = 0x7FFFFFFF;
-
- /**
- * Number of docs in a segment
- *
- * @var integer
- */
- private $_docCount;
-
- /**
- * Segment name
- *
- * @var string
- */
- private $_name;
-
- /**
- * File system adapter.
- *
- * @var Zend_Search_Lucene_Storage_Directory
- */
- private $_directory;
-
- /**
- * List of the index files.
- * Used for automatic compound file generation
- *
- * @var unknown_type
- */
- private $_files;
-
- /**
- * Term Dictionary
- * Array of the Zend_Search_Lucene_Index_Term objects
- * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
- *
- * @var array
- */
- private $_termDictionary;
-
- /**
- * Documents, which contain the term
- *
- * @var array
- */
- private $_termDocs;
-
- /**
- * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
- *
- * @var array
- */
- private $_fields;
-
- /**
- * Normalization factors.
- * An array fieldName => normVector
- * normVector is a binary string.
- * Each byte corresponds to an indexed document in a segment and
- * encodes normalization factor (float value, encoded by
- * Zend_Search_Lucene_Search_Similarity::encodeNorm())
- *
- * @var array
- */
- private $_norms;
-
-
- /**
- * '.fdx' file - Stored Fields, the field index.
- *
- * @var Zend_Search_Lucene_Storage_File
- */
- private $_fdxFile;
-
- /**
- * '.fdx' file - Stored Fields, the field data.
- *
- * @var Zend_Search_Lucene_Storage_File
- */
- private $_fdtFile;
-
-
- /**
- * Object constructor.
- *
- * @param Zend_Search_Lucene_Storage_Directory $directory
- * @param string $name
- */
- public function __construct($directory, $name)
- {
- $this->_directory = $directory;
- $this->_name = $name;
- $this->_docCount = 0;
-
- $this->_fields = array();
- $this->_termDocs = array();
- $this->_files = array();
- $this->_norms = array();
-
- $this->_fdxFile = null;
- $this->_fdtFile = null;
- }
-
-
- /**
- * Add field to the segment
- *
- * @param Zend_Search_Lucene_Field $field
- */
- private function _addFieldInfo(Zend_Search_Lucene_Field $field)
- {
- if (!isset($this->_fields[$field->name])) {
- $this->_fields[$field->name] =
- new Zend_Search_Lucene_Index_FieldInfo($field->name,
- $field->isIndexed,
- count($this->_fields),
- $field->storeTermVector);
- } else {
- $this->_fields[$field->name]->isIndexed |= $field->isIndexed;
- $this->_fields[$field->name]->storeTermVector |= $field->storeTermVector;
- }
- }
-
-
- /**
- * Adds a document to this segment.
- *
- * @param Zend_Search_Lucene_Document $document
- * @throws Zend_Search_Lucene_Exception
- */
- public function addDocument(Zend_Search_Lucene_Document $document)
- {
- $storedFields = array();
-
- foreach ($document->getFieldNames() as $fieldName) {
- $field = $document->getField($fieldName);
- $this->_addFieldInfo($field);
-
- if ($field->storeTermVector) {
- /**
- * @todo term vector storing support
- */
- throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.');
- }
-
- if ($field->isIndexed) {
- if ($field->isTokenized) {
- $tokenList = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($field->stringValue);
- } else {
- $tokenList = array();
- $tokenList[] = new Zend_Search_Lucene_Analysis_Token($field->stringValue, 0, strlen($field->stringValue));
- }
-
- $position = 0;
- foreach ($tokenList as $token) {
- $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name);
- $termKey = $term->key();
-
- if (!isset($this->_termDictionary[$termKey])) {
- // New term
- $this->_termDictionary[$termKey] = $term;
- $this->_termDocs[$termKey] = array();
- $this->_termDocs[$termKey][$this->_docCount] = array();
- } else if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
- // Existing term, but new term entry
- $this->_termDocs[$termKey][$this->_docCount] = array();
- }
- $position += $token->getPositionIncrement();
- $this->_termDocs[$termKey][$this->_docCount][] = $position;
- }
- }
-
- if ($field->isStored) {
- $storedFields[] = $field;
- }
- }
-
- if (count($storedFields) != 0) {
- if (!isset($this->_fdxFile)) {
- $this->_fdxFile = $this->_directory->createFile($this->_name . '.fdx');
- $this->_fdtFile = $this->_directory->createFile($this->_name . '.fdt');
-
- $this->_files[] = $this->_name . '.fdx';
- $this->_files[] = $this->_name . '.fdt';
- }
-
- $this->_fdxFile->writeLong($this->_fdtFile->tell());
-
- $this->_fdtFile->writeVInt(count($storedFields));
- foreach ($storedFields as $field) {
- $this->_fdtFile->writeVInt($this->_fields[$field->name]->number);
- $this->_fdtFile->writeByte($field->isTokenized ? 0x01 : 0x00 |
- $field->isBinary ? 0x02 : 0x00 |
- 0x00 /* 0x04 - third bit, compressed (ZLIB) */ );
- if ($field->isBinary) {
- $this->_fdtFile->writeVInt(strlen($field->stringValue));
- $this->_fdtFile->writeBytes($field->stringValue);
- } else {
- $this->_fdtFile->writeString($field->stringValue);
- }
- }
- }
-
- $this->_docCount++;
- }
-
-
- /**
- * Dump Field Info (.fnm) segment file
- */
- private function _dumpFNM()
- {
- $fnmFile = $this->_directory->createFile($this->_name . '.fnm');
- $fnmFile->writeVInt(count($this->_fields));
-
- foreach ($this->_fields as $field) {
- $fnmFile->writeString($field->name);
- $fnmFile->writeByte(($field->isIndexed ? 0x01 : 0x00) |
- ($field->storeTermVector ? 0x02 : 0x00) |
-// not supported yet 0x04 /* term positions are stored with the term vectors */ |
-// not supported yet 0x08 /* term offsets are stored with the term vectors */ |
-/* not supported yet */ 0x10 /* norms are omitted for the indexed field */
- );
- }
-
- $this->_files[] = $this->_name . '.fnm';
- }
-
-
- /**
- * Dump Term Dictionary segment file entry.
- * Used to write entry to .tis or .tii files
- *
- * @param Zend_Search_Lucene_Storage_File $dicFile
- * @param Zend_Search_Lucene_Index_Term $prevTerm
- * @param Zend_Search_Lucene_Index_Term $term
- * @param Zend_Search_Lucene_Index_TermInfo $prevTermInfo
- * @param Zend_Search_Lucene_Index_TermInfo $termInfo
- */
- private function _dumpTermDictEntry(Zend_Search_Lucene_Storage_File $dicFile,
- &$prevTerm, Zend_Search_Lucene_Index_Term $term,
- &$prevTermInfo, Zend_Search_Lucene_Index_TermInfo $termInfo)
- {
- if (isset($prevTerm) && $prevTerm->field == $term->field) {
- $prefixLength = 0;
- while ($prefixLength < strlen($prevTerm->text) &&
- $prefixLength < strlen($term->text) &&
- $prevTerm->text{$prefixLength} == $term->text{$prefixLength}
- ) {
- $prefixLength++;
- }
- // Write preffix length
- $dicFile->writeVInt($prefixLength);
- // Write suffix
- $dicFile->writeString( substr($term->text, $prefixLength) );
- } else {
- // Write preffix length
- $dicFile->writeVInt(0);
- // Write suffix
- $dicFile->writeString($term->text);
- }
- // Write field number
- $dicFile->writeVInt($term->field);
- // DocFreq (the count of documents which contain the term)
- $dicFile->writeVInt($termInfo->docFreq);
-
- $prevTerm = $term;
-
- if (!isset($prevTermInfo)) {
- // Write FreqDelta
- $dicFile->writeVInt($termInfo->freqPointer);
- // Write ProxDelta
- $dicFile->writeVInt($termInfo->proxPointer);
- } else {
- // Write FreqDelta
- $dicFile->writeVInt($termInfo->freqPointer - $prevTermInfo->freqPointer);
- // Write ProxDelta
- $dicFile->writeVInt($termInfo->proxPointer - $prevTermInfo->proxPointer);
- }
- // Write SkipOffset - it's not 0 when $termInfo->docFreq > self::$skipInterval
- if ($termInfo->skipOffset != 0) {
- $dicFile->writeVInt($termInfo->skipOffset);
- }
-
- $prevTermInfo = $termInfo;
- }
-
- /**
- * Dump Term Dictionary (.tis) and Term Dictionary Index (.tii) segment files
- */
- private function _dumpDictionary()
- {
- $tisFile = $this->_directory->createFile($this->_name . '.tis');
- $tisFile->writeInt((int)0xFFFFFFFE);
- $tisFile->writeLong(count($this->_termDictionary));
- $tisFile->writeInt(self::$indexInterval);
- $tisFile->writeInt(self::$skipInterval);
-
- $tiiFile = $this->_directory->createFile($this->_name . '.tii');
- $tiiFile->writeInt((int)0xFFFFFFFE);
- $tiiFile->writeLong((int)((count($this->_termDictionary) - 1)/self::$indexInterval) + 1);
- $tiiFile->writeInt(self::$indexInterval);
- $tiiFile->writeInt(self::$skipInterval);
-
- $frqFile = $this->_directory->createFile($this->_name . '.frq');
- $prxFile = $this->_directory->createFile($this->_name . '.prx');
-
- $termKeys = array_keys($this->_termDictionary);
- sort($termKeys, SORT_STRING);
-
- $termCount = 0;
-
- $prevTerm = null;
- $prevTermInfo = null;
- $prevIndexTerm = null;
- $prevIndexTermInfo = null;
- $prevIndexPosition = 0;
-
- foreach ($termKeys as $termId) {
- $freqPointer = $frqFile->tell();
- $proxPointer = $prxFile->tell();
-
- $prevDoc = 0;
- foreach ($this->_termDocs[$termId] as $docId => $termPositions) {
- $docDelta = ($docId - $prevDoc)*2;
- $prevDoc = $docId;
- if (count($termPositions) > 1) {
- $frqFile->writeVInt($docDelta);
- $frqFile->writeVInt(count($termPositions));
- } else {
- $frqFile->writeVInt($docDelta + 1);
- }
-
- $prevPosition = 0;
- foreach ($termPositions as $position) {
- $prxFile->writeVInt($position - $prevPosition);
- $prevPosition = $position;
- }
- }
-
- if (count($this->_termDocs[$termId]) >= self::$skipInterval) {
- /**
- * @todo Write Skip Data to a freq file.
- * It's not used now, but must be implemented to be compatible with Lucene
- */
- $skipOffset = $frqFile->tell() - $freqPointer;
- } else {
- $skipOffset = 0;
- }
-
- $term = new Zend_Search_Lucene_Index_Term($this->_termDictionary[$termId]->text,
- $this->_fields[$this->_termDictionary[$termId]->field]->number);
- $termInfo = new Zend_Search_Lucene_Index_TermInfo(count($this->_termDocs[$termId]),
- $freqPointer, $proxPointer, $skipOffset);
-
- $this->_dumpTermDictEntry($tisFile, $prevTerm, $term, $prevTermInfo, $termInfo);
-
- if ($termCount % self::$indexInterval == 0) {
- $this->_dumpTermDictEntry($tiiFile, $prevIndexTerm, $term, $prevIndexTermInfo, $termInfo);
-
- $indexPosition = $tisFile->tell();
- $tiiFile->writeVInt($indexPosition - $prevIndexPosition);
- $prevIndexPosition = $indexPosition;
- }
- $termCount++;
- }
-
- $this->_files[] = $this->_name . '.tis';
- $this->_files[] = $this->_name . '.tii';
- $this->_files[] = $this->_name . '.frq';
- $this->_files[] = $this->_name . '.prx';
- }
-
-
- /**
- * Generate compound index file
- */
- private function _generateCFS()
- {
- $cfsFile = $this->_directory->createFile($this->_name . '.cfs');
- $cfsFile->writeVInt(count($this->_files));
-
- $dataOffsetPointers = array();
- foreach ($this->_files as $fileName) {
- $dataOffsetPointers[$fileName] = $cfsFile->tell();
- $cfsFile->writeLong(0); // write dummy data
- $cfsFile->writeString($fileName);
- }
-
- foreach ($this->_files as $fileName) {
- // Get actual data offset
- $dataOffset = $cfsFile->tell();
- // Seek to the data offset pointer
- $cfsFile->seek($dataOffsetPointers[$fileName]);
- // Write actual data offset value
- $cfsFile->writeLong($dataOffset);
- // Seek back to the end of file
- $cfsFile->seek($dataOffset);
-
- $dataFile = $this->_directory->getFileObject($fileName);
- $cfsFile->writeBytes($dataFile->readBytes($this->_directory->fileLength($fileName)));
-
- $this->_directory->deleteFile($fileName);
- }
- }
-
-
- /**
- * Close segment, write it to disk and return segment info
- *
- * @return Zend_Search_Lucene_Index_SegmentInfo
- */
- public function close()
- {
- if ($this->_docCount == 0) {
- return null;
- }
-
- $this->_dumpFNM();
- $this->_dumpDictionary();
-
- $this->_generateCFS();
-
- return new Zend_Search_Lucene_Index_SegmentInfo($this->_name,
- $this->_docCount,
- $this->_directory);
- }
-
-}
-
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Index/Term.php b/buildscripts/texbuilder/Zend/Search/Lucene/Index/Term.php
deleted file mode 100644
index e30ce587..00000000
--- a/buildscripts/texbuilder/Zend/Search/Lucene/Index/Term.php
+++ /dev/null
@@ -1,70 +0,0 @@
-<?php
-/**
- * Zend Framework
- *
- * LICENSE
- *
- * This source file is subject to version 1.0 of the Zend Framework
- * license, that is bundled with this package in the file LICENSE, and
- * is available through the world-wide-web at the following URL:
- * http://www.zend.com/license/framework/1_0.txt. If you did not receive
- * a copy of the Zend Framework license and are unable to obtain it
- * through the world-wide-web, please send a note to license@zend.com
- * so we can mail you a copy immediately.
- *
- * @package Zend_Search_Lucene
- * @subpackage Index
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-
-
-/**
- * A Term represents a word from text. This is the unit of search. It is
- * composed of two elements, the text of the word, as a string, and the name of
- * the field that the text occured in, an interned string.
- *
- * Note that terms may represent more than words from text fields, but also
- * things like dates, email addresses, urls, etc.
- *
- * @package Zend_Search_Lucene
- * @subpackage Index
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-class Zend_Search_Lucene_Index_Term
-{
- /**
- * Field name or field number (depending from context)
- *
- * @var mixed
- */
- public $field;
-
- /**
- * Term value
- *
- * @var string
- */
- public $text;
-
-
- /**
- * @todo docblock
- */
- public function __construct( $text, $field = 'contents' )
- {
- $this->field = $field;
- $this->text = $text;
- }
-
-
- /**
- * @todo docblock
- */
- public function key()
- {
- return $this->field . chr(0) . $this->text;
- }
-}
-
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Index/TermInfo.php b/buildscripts/texbuilder/Zend/Search/Lucene/Index/TermInfo.php
deleted file mode 100644
index ddef721d..00000000
--- a/buildscripts/texbuilder/Zend/Search/Lucene/Index/TermInfo.php
+++ /dev/null
@@ -1,77 +0,0 @@
-<?php
-/**
- * Zend Framework
- *
- * LICENSE
- *
- * This source file is subject to version 1.0 of the Zend Framework
- * license, that is bundled with this package in the file LICENSE, and
- * is available through the world-wide-web at the following URL:
- * http://www.zend.com/license/framework/1_0.txt. If you did not receive
- * a copy of the Zend Framework license and are unable to obtain it
- * through the world-wide-web, please send a note to license@zend.com
- * so we can mail you a copy immediately.
- *
- * @package Zend_Search_Lucene
- * @subpackage Index
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-
-
-/**
- * A Zend_Search_Lucene_Index_TermInfo represents a record of information stored for a term.
- *
- * @package Zend_Search_Lucene
- * @subpackage Index
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-class Zend_Search_Lucene_Index_TermInfo
-{
- /**
- * The number of documents which contain the term.
- *
- * @var integer
- */
- public $docFreq;
-
- /**
- * Data offset in a Frequencies file.
- *
- * @var integer
- */
- public $freqPointer;
-
- /**
- * Data offset in a Positions file.
- *
- * @var integer
- */
- public $proxPointer;
-
- /**
- * ScipData offset in a Frequencies file.
- *
- * @var integer
- */
- public $skipOffset;
-
- /**
- * Term offset of the _next_ term in a TermDictionary file.
- * Used only for Term Index
- *
- * @var integer
- */
- public $indexPointer;
-
- public function __construct($docFreq, $freqPointer, $proxPointer, $skipOffset, $indexPointer = null)
- {
- $this->docFreq = $docFreq;
- $this->freqPointer = $freqPointer;
- $this->proxPointer = $proxPointer;
- $this->skipOffset = $skipOffset;
- $this->indexPointer = $indexPointer;
- }
-}
-
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Index/Writer.php b/buildscripts/texbuilder/Zend/Search/Lucene/Index/Writer.php
deleted file mode 100644
index da4af000..00000000
--- a/buildscripts/texbuilder/Zend/Search/Lucene/Index/Writer.php
+++ /dev/null
@@ -1,308 +0,0 @@
-<?php
-/**
- * Zend Framework
- *
- * LICENSE
- *
- * This source file is subject to version 1.0 of the Zend Framework
- * license, that is bundled with this package in the file LICENSE, and
- * is available through the world-wide-web at the following URL:
- * http://www.zend.com/license/framework/1_0.txt. If you did not receive
- * a copy of the Zend Framework license and are unable to obtain it
- * through the world-wide-web, please send a note to license@zend.com
- * so we can mail you a copy immediately.
- *
- * @package Zend_Search_Lucene
- * @subpackage Index
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-
-
-/** Zend_Search_Lucene_Index_SegmentWriter */
-require_once 'Zend/Search/Lucene/Index/SegmentWriter.php';
-
-/** Zend_Search_Lucene_Index_SegmentInfo */
-require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
-
-
-/**
- * @package Zend_Search_Lucene
- * @subpackage Index
- * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
- */
-class Zend_Search_Lucene_Index_Writer
-{
- /**
- * @todo Implement segment merger
- * @todo Implement mergeFactor, minMergeDocs, maxMergeDocs usage.
- * @todo Implement Analyzer substitution
- * @todo Implement Zend_Search_Lucene_Storage_DirectoryRAM and Zend_Search_Lucene_Storage_FileRAM to use it for
- * temporary index files
- * @todo Directory lock processing
- */
-
- /**
- * File system adapter.
- *
- * @var Zend_Search_Lucene_Storage_Directory
- */
- private $_directory = null;
-
-
- /**
- * Index version
- * Counts how often the index has been changed by adding or deleting docs
- *
- * @var integer
- */
- private $_version;
-
- /**
- * Segment name counter.
- * Used to name new segments .
- *
- * @var integer
- */
- private $_segmentNameCounter;
-
- /**
- * Number of the segments in the index
- *
- * @var inteher
- */
- private $_segments;
-
- /**
- * Determines how often segment indices
- * are merged by addDocument().
- *
- * @var integer
- */
- public $mergeFactor;
-
- /**
- * Determines the minimal number of documents required before
- * the buffered in-memory documents are merging and a new Segment
- * is created.
- *
- * @var integer
- */
- public $minMergeDocs;
-
- /**
- * Determines the largest number of documents ever merged by addDocument().
- *
- * @var integer
- */
- public $maxMergeDocs;
-
- /**
- * List of the segments, created by index writer
- * Array of Zend_Search_Lucene_Index_SegmentInfo objects
- *
- * @var array
- */
- private $_newSegments;
-
- /**
- * Current segment to add documents
- *
- * @var Zend_Search_Lucene_Index_SegmentWriter
- */
- private $_currentSegment;
-
- /**
- * Opens the index for writing
- *
- * IndexWriter constructor needs Directory as a parameter. It should be
- * a string with a path to the index folder or a Directory object.
- * Second constructor parameter create is optional - true to create the
- * index or overwrite the existing one.
- *
- * @param Zend_Search_Lucene_Storage_Directory $directory
- * @param boolean $create
- */
- public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $create = false)
- {
- $this->_directory = $directory;
-
- if ($create) {
- foreach ($this->_directory->fileList() as $file) {
- if ($file == 'deletable' ||
- $file == 'segments' ||
- substr($file, strlen($file)-4) == '.cfs') {
- $this->_directory->deleteFile($file);
- }
- }
- $segmentsFile = $this->_directory->createFile('segments');
- $segmentsFile->writeInt((int)0xFFFFFFFF);
- // write version
- $segmentsFile->writeLong(0);
- // write name counter
- $segmentsFile->writeInt(0);
- // write segment counter
- $segmentsFile->writeInt(0);
-
- $deletableFile = $this->_directory->createFile('deletable');
- // write counter
- $deletableFile->writeInt(0);
-
- $this->_version = 0;
- $this->_segmentNameCounter = 0;
- $this->_segments = 0;
- } else {
- $segmentsFile = $this->_directory->getFileObject('segments');
- $format = $segmentsFile->readInt();
- if ($format != (int)0xFFFFFFFF) {
- throw new Zend_Search_Lucene_Exception('Wrong segments file format');
- }
-
- // read version
- $this->_version = $segmentsFile->readLong();
- // read counter
- $this->_segmentNameCounter = $segmentsFile->readInt();
- // read segment counter
- $this->_segments = $segmentsFile->readInt();
- }
-
- $this->_newSegments = array();
- $this->_currentSegment = null;
- }
-
- /**
- * Adds a document to this index.
- *
- * @param Zend_Search_Lucene_Document $document
- */
- public function addDocument(Zend_Search_Lucene_Document $document)
- {
- if ($this->_currentSegment === null) {
- $this->_currentSegment =
- new Zend_Search_Lucene_Index_SegmentWriter($this->_directory, $this->_newSegmentName());
- }
- $this->_currentSegment->addDocument($document);
- $this->_version++;
- }
-
-
-
- /**
- * Update segments file by adding current segment to a list
- * @todo !!!!!Finish the implementation
- *
- * @throws Zend_Search_Lucene_Exception
- */
- private function _updateSegments()
- {
- $segmentsFile = $this->_directory->getFileObject('segments');
- $newSegmentFile = $this->_directory->createFile('segments.new');
-
- $newSegmentFile->writeInt((int)0xFFFFFFFF);
- $newSegmentFile->writeLong($this->_version);
- $newSegmentFile->writeInt($this->_segmentNameCounter);
- $newSegmentFile->writeInt($this->_segments + count($this->_newSegments));
-
- $segmentsFile->seek(20);
- $newSegmentFile->writeBytes($segmentsFile->readBytes($this->_directory->fileLength('segments') - 20));
-
- foreach ($this->_newSegments as $segmentName => $segmentInfo) {
- $newSegmentFile->writeString($segmentName);
- $newSegmentFile->writeInt($segmentInfo->count());
- }
-
- $this->_directory->renameFile('segments.new', 'segments');
- }
-
-
- /**
- * Commit current changes
- * returns array of new segments
- *
- * @return array
- */
- public function commit()
- {
- if ($this->_currentSegment !== null) {
- $newSegment = $this->_currentSegment->close();
- if ($newSegment !== null) {
- $this->_newSegments[$newSegment->getName()] = $newSegment;
- }
- $this->_currentSegment = null;
- }
-
- if (count($this->_newSegments) != 0) {
- $this->_updateSegments();
- }
-
- $result = $this->_newSegments;
- $this->_newSegments = array();
-
- return $result;
- }
-
-
- /**
- * Merges the provided indexes into this index.
- *
- * @param array $readers
- * @return void
- */
- public function addIndexes($readers)
- {
- /**
- * @todo implementation
- */
- }
-
-
- /**
- * Returns the number of documents currently in this index.
- *
- * @return integer
- */
- public function docCount($readers)
- {
- /**
- * @todo implementation
- */
- }
-
-
- /**
- * Flushes all changes to an index and closes all associated files.
- *
- */
- public function close()
- {
- /**
- * @todo implementation
- */
- }
-
-
- /**
- * Merges all segments together into a single segment, optimizing
- * an index for search.
- *
- * return void
- */
- public function optimize()
- {
- /**
- * @todo implementation
- */
- }
-
- /**
- * Get name for new segment
- *
- * @return string
- */
- private function _newSegmentName()
- {
- return '_' . base_convert($this->_segmentNameCounter++, 10, 36);
- }
-
-}