From e2149e3d8b87b382563a9def6f376a5f289754d3 Mon Sep 17 00:00:00 2001
From: xue <>
Date: Fri, 12 May 2006 12:38:32 +0000
Subject: Merge from 3.0 branch till 1054.
---
buildscripts/index/api_index.php | 120 ++++++++++++++++++++++++++++++++
buildscripts/index/build.php | 65 +++++++++++++++++
buildscripts/index/quickstart_index.php | 109 +++++++++++++++++++++++++++++
3 files changed, 294 insertions(+)
create mode 100644 buildscripts/index/api_index.php
create mode 100644 buildscripts/index/build.php
create mode 100644 buildscripts/index/quickstart_index.php
(limited to 'buildscripts/index')
diff --git a/buildscripts/index/api_index.php b/buildscripts/index/api_index.php
new file mode 100644
index 00000000..ac2e37a7
--- /dev/null
+++ b/buildscripts/index/api_index.php
@@ -0,0 +1,120 @@
+_api = $api;
+ $this->_index = new Zend_Search_Lucene($index_file, true);
+
+
+ }
+
+ function create_index()
+ {
+ echo "Building search index...\n";
+ $files = $this->get_file_list($this->_api);
+ $count = 0;
+ foreach($files as $file)
+ {
+ $content = $this->get_details($file, $this->_api);
+
+ $doc = new Zend_Search_Lucene_Document();
+
+ $title = $content['namespace'].'.'.$content['class'];
+
+ echo " Adding ".$title."\n";
+
+ //unsearchable text
+ $doc->addField(Zend_Search_Lucene_Field::UnIndexed('link', $content['link']));
+ $doc->addField(Zend_Search_Lucene_Field::UnIndexed('title', $title));
+ //$doc->addField(Zend_Search_Lucene_Field::UnIndexed('text', $content['content']));
+
+ //searchable
+ $body = strtolower($this->sanitize($content['content'])).' '.strtolower($title);
+ $doc->addField(Zend_Search_Lucene_Field::Keyword('page', strtolower(str_replace('.',' ',$title))));
+ $doc->addField(Zend_Search_Lucene_Field::Unstored('contents',$body));
+ $this->_index->addDocument($doc);
+ $count++;
+ }
+ $this->_index->commit();
+ echo "\n {$count} files indexed.\n";
+ }
+
+ function sanitize($input)
+ {
+ return htmlentities(strip_tags( $input ));
+ }
+
+
+ function get_file_list($path)
+ {
+
+ $d = dir($path);
+
+ $files = array();
+ while (false !== ($entry = $d->read()))
+ {
+ $filepath = $path.'/'.$entry;
+
+ if(is_dir($filepath) && is_int(strpos($entry, 'System')))
+ {
+ $files = array_merge($files, $this->get_files($filepath));
+ }
+ }
+
+ $d->close();
+ return $files;
+ }
+
+ function get_files($path)
+ {
+ $d = dir($path);
+
+ $files = array();
+ while (false !== ($entry = $d->read()))
+ {
+ $filepath = $path.'/'.$entry;
+ if(is_file($filepath) && $entry[0] !== '_')
+ $files[] = realpath($filepath);
+ }
+ return $files;
+ $d->close();
+ }
+
+ function get_doc_content($file)
+ {
+ $content = file_get_contents($file);
+ $html = preg_replace('/
/','~~~', $content);
+ $html = preg_replace('/[\s\w\W\S]+/m', '', $html);
+ $html = preg_replace('/ |~+|\s{2,}/',' ',$html);
+ $html = preg_replace('/\s{2,}/',' ',$html);
+ $text = strip_tags($html);
+ $text = str_replace(' , ',', ',$text);
+ return $text;
+ }
+
+ function get_details($file, $base)
+ {
+ $result['content'] = $this->get_doc_content($file);
+ $find = array($base, '.html', '-');
+ $replace = array('', '', '.');
+ $path = preg_split('/\/|\\\/', str_replace($find, $replace, $file));
+ $result['namespace'] = $path[1];
+ $result['class'] = $path[2];
+ $result['link'] = self::API_URL.$path[1].'/'.$path[2].'.html';
+ return $result;
+ }
+}
+
+
+?>
\ No newline at end of file
diff --git a/buildscripts/index/build.php b/buildscripts/index/build.php
new file mode 100644
index 00000000..9ec0d659
--- /dev/null
+++ b/buildscripts/index/build.php
@@ -0,0 +1,65 @@
+create_index();
+ }
+ else if(strtolower($argv[1]) == "api")
+ {
+ $api = new api_index($api_target, $api_source);
+ $api->create_index();
+ }
+ else
+ {
+ $q = new Zend_Search_Lucene($quickstart_target);
+ $query = $argv[1];
+ $hits = $q->find(strtolower($query));
+ echo "Found ".count($hits)." for ".$query." in quick start\n";
+ foreach($hits as $hit)
+ echo " ".$hit->title."\n";
+
+ $a = new Zend_Search_Lucene($api_target);
+ $query = $argv[1];
+ $hits = $a->find(strtolower($query));
+ echo "\nFound ".count($hits)." for ".$query." in API\n";
+ foreach($hits as $hit)
+ {
+ echo " ".$hit->link."\n";
+ }
+ }
+}
+else
+{
+ echo "Usage: 'php build.php quickstart' or 'php build.php api'\n";
+}
+
+?>
\ No newline at end of file
diff --git a/buildscripts/index/quickstart_index.php b/buildscripts/index/quickstart_index.php
new file mode 100644
index 00000000..565734ef
--- /dev/null
+++ b/buildscripts/index/quickstart_index.php
@@ -0,0 +1,109 @@
+_index = new Zend_Search_Lucene($index_file, true);
+ $this->_dir = $index_file;
+ $this->_base = $base;
+ $this->_source = $source;
+ }
+
+ public function create_index()
+ {
+ echo "Building search index...\n";
+ $pages = include($this->_source);
+ $count = 0;
+ foreach($pages as $chapter => $sections)
+ {
+ foreach($sections as $section)
+ {
+ echo " Adding $section\n";
+ $page = $this->_base.'/'.$section;
+ $file_content = file_get_contents($page);
+ $this->add($file_content,$section, filemtime($page));
+ $count++;
+ }
+ }
+
+ $this->_index->commit();
+ echo "\n {$count} files indexed.\n";
+ }
+
+ public function add($content, $section, $mtime)
+ {
+ foreach($this->split_headings($content) as $headers)
+ {
+ $doc = new Zend_Search_Lucene_Document();
+ $link = "index.php?page=".preg_replace('/\/|\\\/', '.', $section);
+ $link = str_replace('.page', '', $link).'#'.$headers['section'];
+
+ //unsearchable text
+ $doc->addField(Zend_Search_Lucene_Field::UnIndexed('link', $link));
+ $doc->addField(Zend_Search_Lucene_Field::UnIndexed('mtime', $mtime));
+ $doc->addField(Zend_Search_Lucene_Field::UnIndexed('title', $headers['title']));
+ $doc->addField(Zend_Search_Lucene_Field::UnIndexed('text', $headers['content']));
+
+ //searchable text
+ $doc->addField(Zend_Search_Lucene_Field::Keyword('page', strtolower($headers['title'])));
+ $body = strtolower($this->sanitize($headers['content'])).' '.strtolower($headers['title']);
+ $doc->addField(Zend_Search_Lucene_Field::Unstored('contents',$body));
+ $this->_index->addDocument($doc);
+ }
+ }
+
+ function sanitize($input)
+ {
+ return htmlentities(strip_tags( $input ));
+ }
+
+ public function index()
+ {
+ return $this->_index;
+ }
+
+ protected function split_headings($html)
+ {
+ $html = preg_replace('/<\/?com:TContent[^<]*>/', '', $html);
+
+ $html = preg_replace('/([^<]*)<\/b>/', '$1', $html);
+ $html = preg_replace('/([^<]*)<\/i>/', '$1', $html);
+ $html = preg_replace('/([^<]*)<\/tt>/', '$1', $html);
+
+ $html = preg_replace('/]*)>([^<]*)<\/h1>/', '$2', $html);
+ $html = preg_replace('/]*)>([^<]*)<\/h2>/', '$2', $html);
+ $html = preg_replace('/]*)>([^<]*)<\/h3>/', '$2', $html);
+
+
+ $sections = preg_split('/]*>([^<]+)<\/hh>/', $html,-1);
+ $headers = array();
+ preg_match_all('/]*)>([^<]+)<\/hh>/', $html, $headers);
+ $contents = array();
+ for($i = 1, $t = count($sections); $i < $t; $i++)
+ {
+ $content['title'] = trim($this->sanitize($headers[2][$i-1]));
+ $sec = array();
+ preg_match('/"([^"]*)"/', $headers[1][$i-1], $sec);
+ $content['section'] = str_replace('"', '',$sec[0]);
+ $content['content'] = trim($this->sanitize($sections[$i]));
+ $contents[] = $content;
+ }
+
+ return $contents;
+ }
+
+ public function commit()
+ {
+ $this->_index->commit();
+ $count = $this->_index->count();
+ echo "\nSaving search index ({$count}) to {$this->_dir}\n\n";
+ }
+}
+?>
\ No newline at end of file
--
cgit v1.2.3