summaryrefslogtreecommitdiff
path: root/buildscripts/index/quickstart_index.php
diff options
context:
space:
mode:
authorwei <>2006-05-11 11:35:35 +0000
committerwei <>2006-05-11 11:35:35 +0000
commitcc61c2d74e151a7fb75303f8740edfe74cdd9eb8 (patch)
tree3d57f3db231469caaadf42eabe095cc6f17b9950 /buildscripts/index/quickstart_index.php
parent90e706cda83d3b64a5508ed182956a1e3110ff26 (diff)
Update api docs and add API search
Diffstat (limited to 'buildscripts/index/quickstart_index.php')
-rw-r--r--buildscripts/index/quickstart_index.php109
1 files changed, 109 insertions, 0 deletions
diff --git a/buildscripts/index/quickstart_index.php b/buildscripts/index/quickstart_index.php
new file mode 100644
index 00000000..565734ef
--- /dev/null
+++ b/buildscripts/index/quickstart_index.php
@@ -0,0 +1,109 @@
+<?php
+
+class quickstart_index
+{
+ private $_index;
+ private $_dir;
+
+ private $_base;
+ private $_source;
+
+ public function __construct($index_file, $base, $source)
+ {
+ $this->_index = new Zend_Search_Lucene($index_file, true);
+ $this->_dir = $index_file;
+ $this->_base = $base;
+ $this->_source = $source;
+ }
+
+ public function create_index()
+ {
+ echo "Building search index...\n";
+ $pages = include($this->_source);
+ $count = 0;
+ foreach($pages as $chapter => $sections)
+ {
+ foreach($sections as $section)
+ {
+ echo " Adding $section\n";
+ $page = $this->_base.'/'.$section;
+ $file_content = file_get_contents($page);
+ $this->add($file_content,$section, filemtime($page));
+ $count++;
+ }
+ }
+
+ $this->_index->commit();
+ echo "\n {$count} files indexed.\n";
+ }
+
+ public function add($content, $section, $mtime)
+ {
+ foreach($this->split_headings($content) as $headers)
+ {
+ $doc = new Zend_Search_Lucene_Document();
+ $link = "index.php?page=".preg_replace('/\/|\\\/', '.', $section);
+ $link = str_replace('.page', '', $link).'#'.$headers['section'];
+
+ //unsearchable text
+ $doc->addField(Zend_Search_Lucene_Field::UnIndexed('link', $link));
+ $doc->addField(Zend_Search_Lucene_Field::UnIndexed('mtime', $mtime));
+ $doc->addField(Zend_Search_Lucene_Field::UnIndexed('title', $headers['title']));
+ $doc->addField(Zend_Search_Lucene_Field::UnIndexed('text', $headers['content']));
+
+ //searchable text
+ $doc->addField(Zend_Search_Lucene_Field::Keyword('page', strtolower($headers['title'])));
+ $body = strtolower($this->sanitize($headers['content'])).' '.strtolower($headers['title']);
+ $doc->addField(Zend_Search_Lucene_Field::Unstored('contents',$body));
+ $this->_index->addDocument($doc);
+ }
+ }
+
+ function sanitize($input)
+ {
+ return htmlentities(strip_tags( $input ));
+ }
+
+ public function index()
+ {
+ return $this->_index;
+ }
+
+ protected function split_headings($html)
+ {
+ $html = preg_replace('/<\/?com:TContent[^<]*>/', '', $html);
+
+ $html = preg_replace('/<b>([^<]*)<\/b>/', '$1', $html);
+ $html = preg_replace('/<i>([^<]*)<\/i>/', '$1', $html);
+ $html = preg_replace('/<tt>([^<]*)<\/tt>/', '$1', $html);
+
+ $html = preg_replace('/<h1([^>]*)>([^<]*)<\/h1>/', '<hh$1>$2</hh>', $html);
+ $html = preg_replace('/<h2([^>]*)>([^<]*)<\/h2>/', '<hh$1>$2</hh>', $html);
+ $html = preg_replace('/<h3([^>]*)>([^<]*)<\/h3>/', '<hh$1>$2</hh>', $html);
+
+
+ $sections = preg_split('/<hh[^>]*>([^<]+)<\/hh>/', $html,-1);
+ $headers = array();
+ preg_match_all('/<hh([^>]*)>([^<]+)<\/hh>/', $html, $headers);
+ $contents = array();
+ for($i = 1, $t = count($sections); $i < $t; $i++)
+ {
+ $content['title'] = trim($this->sanitize($headers[2][$i-1]));
+ $sec = array();
+ preg_match('/"([^"]*)"/', $headers[1][$i-1], $sec);
+ $content['section'] = str_replace('"', '',$sec[0]);
+ $content['content'] = trim($this->sanitize($sections[$i]));
+ $contents[] = $content;
+ }
+
+ return $contents;
+ }
+
+ public function commit()
+ {
+ $this->_index->commit();
+ $count = $this->_index->count();
+ echo "\nSaving search index ({$count}) to {$this->_dir}\n\n";
+ }
+}
+?> \ No newline at end of file