_fread(1)); } /** * Writes a byte to the end of the file. * * @param integer $byte */ public function writeByte($byte) { return $this->_fwrite(chr($byte), 1); } /** * Read num bytes from the current position in the file * and advances the file pointer. * * @param integer $num * @return string */ public function readBytes($num) { return $this->_fread($num); } /** * Writes num bytes of data (all, if $num===null) to the end * of the file. * * @param string $data * @param integer $num */ public function writeBytes($data, $num=null) { $this->_fwrite($data, $num); } /** * Reads an integer from the current position in the file * and advances the file pointer. * * @return integer */ public function readInt() { $str = $this->_fread(4); return ord($str{0}) << 24 | ord($str{1}) << 16 | ord($str{2}) << 8 | ord($str{3}); } /** * Writes an integer to the end of file. * * @param integer $value */ public function writeInt($value) { settype($value, 'integer'); $this->_fwrite( chr($value>>24 & 0xFF) . chr($value>>16 & 0xFF) . chr($value>>8 & 0xFF) . chr($value & 0xFF), 4 ); } /** * Returns a long integer from the current position in the file * and advances the file pointer. * * @return integer */ public function readLong() { $str = $this->_fread(8); /** * PHP uses long as largest integer. fseek() uses long for offset. * long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent * conversion to float. * So, largest index segment file is 2Gb */ return /* ord($str{0}) << 56 | */ /* ord($str{1}) << 48 | */ /* ord($str{2}) << 40 | */ /* ord($str{3}) << 32 | */ ord($str{4}) << 24 | ord($str{5}) << 16 | ord($str{6}) << 8 | ord($str{7}); } /** * Writes long integer to the end of file * * @param integer $value */ public function writeLong($value) { /** * PHP uses long as largest integer. fseek() uses long for offset. * long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent * conversion to float. * So, largest index segment file is 2Gb */ settype($value, 'integer'); $this->_fwrite( "\x00\x00\x00\x00" . chr($value>>24 & 0xFF) . chr($value>>16 & 0xFF) . chr($value>>8 & 0xFF) . chr($value & 0xFF), 8 ); } /** * Returns a variable-length integer from the current * position in the file and advances the file pointer. * * @return integer */ public function readVInt() { $nextByte = ord($this->_fread(1)); $val = $nextByte & 0x7F; for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) { $nextByte = ord($this->_fread(1)); $val |= ($nextByte & 0x7F) << $shift; } return $val; } /** * Writes a variable-length integer to the end of file. * * @param integer $value */ public function writeVInt($value) { settype($value, 'integer'); while ($value > 0x7F) { $this->_fwrite(chr( ($value & 0x7F)|0x80 )); $value >>= 7; } $this->_fwrite(chr($value)); } /** * Reads a string from the current position in the file * and advances the file pointer. * * @return string */ public function readString() { $strlen = $this->readVInt(); if ($strlen == 0) { return ''; } else { /** * This implementation supports only Basic Multilingual Plane * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support * "supplementary characters" (characters whose code points are * greater than 0xFFFF) * Java 2 represents these characters as a pair of char (16-bit) * values, the first from the high-surrogates range (0xD800-0xDBFF), * the second from the low-surrogates range (0xDC00-0xDFFF). Then * they are encoded as usual UTF-8 characters in six bytes. * Standard UTF-8 representation uses four bytes for supplementary * characters. */ $str_val = $this->_fread($strlen); for ($count = 0; $count < $strlen; $count++ ) { if (( ord($str_val{$count}) & 0xC0 ) == 0xC0) { $addBytes = 1; if (ord($str_val{$count}) & 0x20 ) { $addBytes++; // Never used. Java2 doesn't encode strings in four bytes if (ord($str_val{$count}) & 0x10 ) { $addBytes++; } } $str_val .= $this->_fread($addBytes); $strlen += $addBytes; // Check for null character. Java2 encodes null character // in two bytes. if (ord($str_val{$count}) == 0xC0 && ord($str_val{$count+1}) == 0x80 ) { $str_val{$count} = 0; $str_val = substr($str_val,0,$count+1) . substr($str_val,$count+2); } $count += $addBytes; } } return $str_val; } } /** * Writes a string to the end of file. * * @param string $str * @throws Zend_Search_Lucene_Exception */ public function writeString($str) { /** * This implementation supports only Basic Multilingual Plane * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support * "supplementary characters" (characters whose code points are * greater than 0xFFFF) * Java 2 represents these characters as a pair of char (16-bit) * values, the first from the high-surrogates range (0xD800-0xDBFF), * the second from the low-surrogates range (0xDC00-0xDFFF). Then * they are encoded as usual UTF-8 characters in six bytes. * Standard UTF-8 representation uses four bytes for supplementary * characters. */ // convert input to a string before iterating string characters settype($str, 'string'); $chars = $strlen = strlen($str); $containNullChars = false; for ($count = 0; $count < $strlen; $count++ ) { /** * String is already in Java 2 representation. * We should only calculate actual string length and replace * \x00 by \xC0\x80 */ if ((ord($str{$count}) & 0xC0) == 0xC0) { $addBytes = 1; if (ord($str{$count}) & 0x20 ) { $addBytes++; // Never used. Java2 doesn't encode strings in four bytes // and we dont't support non-BMP characters if (ord($str{$count}) & 0x10 ) { $addBytes++; } } $chars -= $addBytes; if (ord($str{$count}) == 0 ) { $containNullChars = true; } $count += $addBytes; } } if ($chars < 0) { throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string'); } $this->writeVInt($chars); if ($containNullChars) { $this->_fwrite(str_replace($str, "\x00", "\xC0\x80")); } else { $this->_fwrite($str); } } /** * Reads binary data from the current position in the file * and advances the file pointer. * * @return string */ public function readBinary() { return $this->_fread($this->readVInt()); } }