diff options
Diffstat (limited to 'buildscripts/texbuilder/Zend/Search/Lucene/Storage/File.php')
-rw-r--r-- | buildscripts/texbuilder/Zend/Search/Lucene/Storage/File.php | 376 |
1 files changed, 376 insertions, 0 deletions
diff --git a/buildscripts/texbuilder/Zend/Search/Lucene/Storage/File.php b/buildscripts/texbuilder/Zend/Search/Lucene/Storage/File.php new file mode 100644 index 00000000..f62af33a --- /dev/null +++ b/buildscripts/texbuilder/Zend/Search/Lucene/Storage/File.php @@ -0,0 +1,376 @@ +<?php +/** + * Zend Framework + * + * LICENSE + * + * This source file is subject to version 1.0 of the Zend Framework + * license, that is bundled with this package in the file LICENSE, and + * is available through the world-wide-web at the following URL: + * http://www.zend.com/license/framework/1_0.txt. If you did not receive + * a copy of the Zend Framework license and are unable to obtain it + * through the world-wide-web, please send a note to license@zend.com + * so we can mail you a copy immediately. + * + * @package Zend_Search_Lucene + * @subpackage Storage + * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) + * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 + */ + + +/** + * @package Zend_Search_Lucene + * @subpackage Storage + * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) + * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 + */ +abstract class Zend_Search_Lucene_Storage_File +{ + + /** + * Class constructor. Open the file. + */ + abstract public function __construct($filename, $mode='r'); + + + /** + * Reads $length number of bytes at the current position in the + * file and advances the file pointer. + * + * @param integer $length + * @return string + */ + abstract protected function _fread($length=1); + + + /** + * Sets the file position indicator and advances the file pointer. + * The new position, measured in bytes from the beginning of the file, + * is obtained by adding offset to the position specified by whence, + * whose values are defined as follows: + * SEEK_SET - Set position equal to offset bytes. + * SEEK_CUR - Set position to current location plus offset. + * SEEK_END - Set position to end-of-file plus offset. (To move to + * a position before the end-of-file, you need to pass a negative value + * in offset.) + * Upon success, returns 0; otherwise, returns -1 + * + * @param integer $offset + * @param integer $whence + * @return integer + */ + abstract public function seek($offset, $whence=SEEK_SET); + + /** + * Get file position. + * + * @return integer + */ + abstract public function tell(); + + /** + * Writes $length number of bytes (all, if $length===null) to the end + * of the file. + * + * @param string $data + * @param integer $length + */ + abstract protected function _fwrite($data, $length=null); + + + /** + * Reads a byte from the current position in the file + * and advances the file pointer. + * + * @return integer + */ + public function readByte() + { + return ord($this->_fread(1)); + } + + /** + * Writes a byte to the end of the file. + * + * @param integer $byte + */ + public function writeByte($byte) + { + return $this->_fwrite(chr($byte), 1); + } + + /** + * Read num bytes from the current position in the file + * and advances the file pointer. + * + * @param integer $num + * @return string + */ + public function readBytes($num) + { + return $this->_fread($num); + } + + /** + * Writes num bytes of data (all, if $num===null) to the end + * of the file. + * + * @param string $data + * @param integer $num + */ + public function writeBytes($data, $num=null) + { + $this->_fwrite($data, $num); + } + + + /** + * Reads an integer from the current position in the file + * and advances the file pointer. + * + * @return integer + */ + public function readInt() + { + $str = $this->_fread(4); + + return ord($str{0}) << 24 | + ord($str{1}) << 16 | + ord($str{2}) << 8 | + ord($str{3}); + } + + + /** + * Writes an integer to the end of file. + * + * @param integer $value + */ + public function writeInt($value) + { + settype($value, 'integer'); + $this->_fwrite( chr($value>>24 & 0xFF) . + chr($value>>16 & 0xFF) . + chr($value>>8 & 0xFF) . + chr($value & 0xFF), 4 ); + } + + + /** + * Returns a long integer from the current position in the file + * and advances the file pointer. + * + * @return integer + */ + public function readLong() + { + $str = $this->_fread(8); + + /** + * PHP uses long as largest integer. fseek() uses long for offset. + * long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent + * conversion to float. + * So, largest index segment file is 2Gb + */ + return /* ord($str{0}) << 56 | */ + /* ord($str{1}) << 48 | */ + /* ord($str{2}) << 40 | */ + /* ord($str{3}) << 32 | */ + ord($str{4}) << 24 | + ord($str{5}) << 16 | + ord($str{6}) << 8 | + ord($str{7}); + } + + /** + * Writes long integer to the end of file + * + * @param integer $value + */ + public function writeLong($value) + { + /** + * PHP uses long as largest integer. fseek() uses long for offset. + * long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent + * conversion to float. + * So, largest index segment file is 2Gb + */ + settype($value, 'integer'); + $this->_fwrite( "\x00\x00\x00\x00" . + chr($value>>24 & 0xFF) . + chr($value>>16 & 0xFF) . + chr($value>>8 & 0xFF) . + chr($value & 0xFF), 8 ); + } + + + + /** + * Returns a variable-length integer from the current + * position in the file and advances the file pointer. + * + * @return integer + */ + public function readVInt() + { + $nextByte = ord($this->_fread(1)); + $val = $nextByte & 0x7F; + + for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) { + $nextByte = ord($this->_fread(1)); + $val |= ($nextByte & 0x7F) << $shift; + } + return $val; + } + + /** + * Writes a variable-length integer to the end of file. + * + * @param integer $value + */ + public function writeVInt($value) + { + settype($value, 'integer'); + while ($value > 0x7F) { + $this->_fwrite(chr( ($value & 0x7F)|0x80 )); + $value >>= 7; + } + $this->_fwrite(chr($value)); + } + + + /** + * Reads a string from the current position in the file + * and advances the file pointer. + * + * @return string + */ + public function readString() + { + $strlen = $this->readVInt(); + if ($strlen == 0) { + return ''; + } else { + /** + * This implementation supports only Basic Multilingual Plane + * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support + * "supplementary characters" (characters whose code points are + * greater than 0xFFFF) + * Java 2 represents these characters as a pair of char (16-bit) + * values, the first from the high-surrogates range (0xD800-0xDBFF), + * the second from the low-surrogates range (0xDC00-0xDFFF). Then + * they are encoded as usual UTF-8 characters in six bytes. + * Standard UTF-8 representation uses four bytes for supplementary + * characters. + */ + + $str_val = $this->_fread($strlen); + + for ($count = 0; $count < $strlen; $count++ ) { + if (( ord($str_val{$count}) & 0xC0 ) == 0xC0) { + $addBytes = 1; + if (ord($str_val{$count}) & 0x20 ) { + $addBytes++; + + // Never used. Java2 doesn't encode strings in four bytes + if (ord($str_val{$count}) & 0x10 ) { + $addBytes++; + } + } + $str_val .= $this->_fread($addBytes); + $strlen += $addBytes; + + // Check for null character. Java2 encodes null character + // in two bytes. + if (ord($str_val{$count}) == 0xC0 && + ord($str_val{$count+1}) == 0x80 ) { + $str_val{$count} = 0; + $str_val = substr($str_val,0,$count+1) + . substr($str_val,$count+2); + } + $count += $addBytes; + } + } + + return $str_val; + } + } + + /** + * Writes a string to the end of file. + * + * @param string $str + * @throws Zend_Search_Lucene_Exception + */ + public function writeString($str) + { + /** + * This implementation supports only Basic Multilingual Plane + * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support + * "supplementary characters" (characters whose code points are + * greater than 0xFFFF) + * Java 2 represents these characters as a pair of char (16-bit) + * values, the first from the high-surrogates range (0xD800-0xDBFF), + * the second from the low-surrogates range (0xDC00-0xDFFF). Then + * they are encoded as usual UTF-8 characters in six bytes. + * Standard UTF-8 representation uses four bytes for supplementary + * characters. + */ + + // convert input to a string before iterating string characters + settype($str, 'string'); + + $chars = $strlen = strlen($str); + $containNullChars = false; + + for ($count = 0; $count < $strlen; $count++ ) { + /** + * String is already in Java 2 representation. + * We should only calculate actual string length and replace + * \x00 by \xC0\x80 + */ + if ((ord($str{$count}) & 0xC0) == 0xC0) { + $addBytes = 1; + if (ord($str{$count}) & 0x20 ) { + $addBytes++; + + // Never used. Java2 doesn't encode strings in four bytes + // and we dont't support non-BMP characters + if (ord($str{$count}) & 0x10 ) { + $addBytes++; + } + } + $chars -= $addBytes; + + if (ord($str{$count}) == 0 ) { + $containNullChars = true; + } + $count += $addBytes; + } + } + + if ($chars < 0) { + throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string'); + } + + $this->writeVInt($chars); + if ($containNullChars) { + $this->_fwrite(str_replace($str, "\x00", "\xC0\x80")); + } else { + $this->_fwrite($str); + } + } + + + /** + * Reads binary data from the current position in the file + * and advances the file pointer. + * + * @return string + */ + public function readBinary() + { + return $this->_fread($this->readVInt()); + } +}
\ No newline at end of file |