From 903ae8a581fac1e6917fc3e31d2ad8fb91df80c3 Mon Sep 17 00:00:00 2001 From: ctrlaltca <> Date: Thu, 12 Jul 2012 11:21:01 +0000 Subject: standardize the use of unix eol; use svn properties to enforce native eol --- buildscripts/index/api_index.php | 240 +++++++++++++++++++-------------------- 1 file changed, 120 insertions(+), 120 deletions(-) (limited to 'buildscripts/index/api_index.php') diff --git a/buildscripts/index/api_index.php b/buildscripts/index/api_index.php index c85fa267..339cb042 100644 --- a/buildscripts/index/api_index.php +++ b/buildscripts/index/api_index.php @@ -1,121 +1,121 @@ -_api = $api; - $this->_index = new Zend_Search_Lucene($index_file, true); - - - } - - function create_index() - { - echo "Building search index...\n"; - $files = $this->get_file_list($this->_api); - $count = 0; - foreach($files as $file) - { - echo " processing $file...\n"; - $content = $this->get_details($file, $this->_api); - - $doc = new Zend_Search_Lucene_Document(); - - $title = $content['namespace'].'.'.$content['class']; - - echo " Adding ".$title."\n"; - - //unsearchable text - $doc->addField(Zend_Search_Lucene_Field::UnIndexed('link', $content['link'])); - $doc->addField(Zend_Search_Lucene_Field::UnIndexed('title', $title)); - //$doc->addField(Zend_Search_Lucene_Field::UnIndexed('text', $content['content'])); - - //searchable - $body = strtolower($this->sanitize($content['content'])).' '.strtolower($title); - $doc->addField(Zend_Search_Lucene_Field::Keyword('page', strtolower(str_replace('.',' ',$title)))); - $doc->addField(Zend_Search_Lucene_Field::Unstored('contents',$body)); - $this->_index->addDocument($doc); - $count++; - } - $this->_index->commit(); - echo "\n {$count} files indexed.\n"; - } - - function sanitize($input) - { - return htmlentities(strip_tags( $input )); - } - - - function get_file_list($path) - { - - $d = dir($path); - - $files = array(); - while (false !== ($entry = $d->read())) - { - $filepath = $path.'/'.$entry; - - if(is_dir($filepath) && is_int(strpos($entry, 'System'))) - { - $files = array_merge($files, $this->get_files($filepath)); - } - } - - $d->close(); - return $files; - } - - function get_files($path) - { - $d = dir($path); - - $files = array(); - while (false !== ($entry = $d->read())) - { - $filepath = $path.'/'.$entry; - if(is_file($filepath) && $entry[0] !== '_') - $files[] = realpath($filepath); - } - $d->close(); - return $files; - } - - function get_doc_content($file) - { - $content = file_get_contents($file); - $html = preg_replace('/

/','~~~', $content); - $html = preg_replace('/[\s\w\W\S]+/m', '', $html); - $html = preg_replace('/ |~+|\s{2,}/',' ',$html); - $html = preg_replace('/\s{2,}/',' ',$html); - $text = strip_tags($html); - $text = str_replace(' , ',', ',$text); - return $text; - } - - function get_details($file, $base) - { - $result['content'] = $this->get_doc_content($file); - $find = array($base, '.html', '-'); - $replace = array('', '', '.'); - $path = preg_split('/\/|\\\/', str_replace($find, $replace, $file)); - $result['namespace'] = $path[1]; - $result['class'] = $path[2]; - $result['link'] = self::API_URL.$path[1].'/'.$path[2].'.html'; - return $result; - } -} - - +_api = $api; + $this->_index = new Zend_Search_Lucene($index_file, true); + + + } + + function create_index() + { + echo "Building search index...\n"; + $files = $this->get_file_list($this->_api); + $count = 0; + foreach($files as $file) + { + echo " processing $file...\n"; + $content = $this->get_details($file, $this->_api); + + $doc = new Zend_Search_Lucene_Document(); + + $title = $content['namespace'].'.'.$content['class']; + + echo " Adding ".$title."\n"; + + //unsearchable text + $doc->addField(Zend_Search_Lucene_Field::UnIndexed('link', $content['link'])); + $doc->addField(Zend_Search_Lucene_Field::UnIndexed('title', $title)); + //$doc->addField(Zend_Search_Lucene_Field::UnIndexed('text', $content['content'])); + + //searchable + $body = strtolower($this->sanitize($content['content'])).' '.strtolower($title); + $doc->addField(Zend_Search_Lucene_Field::Keyword('page', strtolower(str_replace('.',' ',$title)))); + $doc->addField(Zend_Search_Lucene_Field::Unstored('contents',$body)); + $this->_index->addDocument($doc); + $count++; + } + $this->_index->commit(); + echo "\n {$count} files indexed.\n"; + } + + function sanitize($input) + { + return htmlentities(strip_tags( $input )); + } + + + function get_file_list($path) + { + + $d = dir($path); + + $files = array(); + while (false !== ($entry = $d->read())) + { + $filepath = $path.'/'.$entry; + + if(is_dir($filepath) && is_int(strpos($entry, 'System'))) + { + $files = array_merge($files, $this->get_files($filepath)); + } + } + + $d->close(); + return $files; + } + + function get_files($path) + { + $d = dir($path); + + $files = array(); + while (false !== ($entry = $d->read())) + { + $filepath = $path.'/'.$entry; + if(is_file($filepath) && $entry[0] !== '_') + $files[] = realpath($filepath); + } + $d->close(); + return $files; + } + + function get_doc_content($file) + { + $content = file_get_contents($file); + $html = preg_replace('/

/','~~~', $content); + $html = preg_replace('/[\s\w\W\S]+/m', '', $html); + $html = preg_replace('/ |~+|\s{2,}/',' ',$html); + $html = preg_replace('/\s{2,}/',' ',$html); + $text = strip_tags($html); + $text = str_replace(' , ',', ',$text); + return $text; + } + + function get_details($file, $base) + { + $result['content'] = $this->get_doc_content($file); + $find = array($base, '.html', '-'); + $replace = array('', '', '.'); + $path = preg_split('/\/|\\\/', str_replace($find, $replace, $file)); + $result['namespace'] = $path[1]; + $result['class'] = $path[2]; + $result['link'] = self::API_URL.$path[1].'/'.$path[2].'.html'; + return $result; + } +} + + ?> \ No newline at end of file -- cgit v1.2.3