summaryrefslogtreecommitdiff
path: root/buildscripts/index/quickstart_index.php
diff options
context:
space:
mode:
authorxue <>2007-01-13 22:39:48 +0000
committerxue <>2007-01-13 22:39:48 +0000
commitb2f63869d92ffc5ca1078ae5854e8de4f07a6927 (patch)
treed7f62b079d59147a7f91ba014e3b2bf5b11d4cdf /buildscripts/index/quickstart_index.php
parent31fae3d7bd911b16eac842189d88231b35ded16d (diff)
updated build script.
Diffstat (limited to 'buildscripts/index/quickstart_index.php')
-rw-r--r--buildscripts/index/quickstart_index.php48
1 files changed, 23 insertions, 25 deletions
diff --git a/buildscripts/index/quickstart_index.php b/buildscripts/index/quickstart_index.php
index 565734ef..fb4bc829 100644
--- a/buildscripts/index/quickstart_index.php
+++ b/buildscripts/index/quickstart_index.php
@@ -4,10 +4,10 @@ class quickstart_index
{
private $_index;
private $_dir;
-
+
private $_base;
private $_source;
-
+
public function __construct($index_file, $base, $source)
{
$this->_index = new Zend_Search_Lucene($index_file, true);
@@ -15,7 +15,7 @@ class quickstart_index
$this->_base = $base;
$this->_source = $source;
}
-
+
public function create_index()
{
echo "Building search index...\n";
@@ -30,13 +30,13 @@ class quickstart_index
$file_content = file_get_contents($page);
$this->add($file_content,$section, filemtime($page));
$count++;
- }
+ }
}
-
+
$this->_index->commit();
- echo "\n {$count} files indexed.\n";
+ echo "\n {$count} files indexed.\n";
}
-
+
public function add($content, $section, $mtime)
{
foreach($this->split_headings($content) as $headers)
@@ -44,44 +44,44 @@ class quickstart_index
$doc = new Zend_Search_Lucene_Document();
$link = "index.php?page=".preg_replace('/\/|\\\/', '.', $section);
$link = str_replace('.page', '', $link).'#'.$headers['section'];
-
+
//unsearchable text
$doc->addField(Zend_Search_Lucene_Field::UnIndexed('link', $link));
$doc->addField(Zend_Search_Lucene_Field::UnIndexed('mtime', $mtime));
$doc->addField(Zend_Search_Lucene_Field::UnIndexed('title', $headers['title']));
- $doc->addField(Zend_Search_Lucene_Field::UnIndexed('text', $headers['content']));
-
+ $doc->addField(Zend_Search_Lucene_Field::UnIndexed('text', $headers['content']));
+
//searchable text
$doc->addField(Zend_Search_Lucene_Field::Keyword('page', strtolower($headers['title'])));
$body = strtolower($this->sanitize($headers['content'])).' '.strtolower($headers['title']);
$doc->addField(Zend_Search_Lucene_Field::Unstored('contents',$body));
$this->_index->addDocument($doc);
- }
+ }
}
-
- function sanitize($input)
+
+ function sanitize($input)
{
return htmlentities(strip_tags( $input ));
- }
-
+ }
+
public function index()
{
return $this->_index;
}
-
+
protected function split_headings($html)
{
$html = preg_replace('/<\/?com:TContent[^<]*>/', '', $html);
-
+
$html = preg_replace('/<b>([^<]*)<\/b>/', '$1', $html);
$html = preg_replace('/<i>([^<]*)<\/i>/', '$1', $html);
$html = preg_replace('/<tt>([^<]*)<\/tt>/', '$1', $html);
-
+
$html = preg_replace('/<h1([^>]*)>([^<]*)<\/h1>/', '<hh$1>$2</hh>', $html);
$html = preg_replace('/<h2([^>]*)>([^<]*)<\/h2>/', '<hh$1>$2</hh>', $html);
$html = preg_replace('/<h3([^>]*)>([^<]*)<\/h3>/', '<hh$1>$2</hh>', $html);
-
-
+
+
$sections = preg_split('/<hh[^>]*>([^<]+)<\/hh>/', $html,-1);
$headers = array();
preg_match_all('/<hh([^>]*)>([^<]+)<\/hh>/', $html, $headers);
@@ -89,19 +89,17 @@ class quickstart_index
for($i = 1, $t = count($sections); $i < $t; $i++)
{
$content['title'] = trim($this->sanitize($headers[2][$i-1]));
- $sec = array();
- preg_match('/"([^"]*)"/', $headers[1][$i-1], $sec);
- $content['section'] = str_replace('"', '',$sec[0]);
+ $content['section'] = str_replace('"', '',trim($headers[1][$i-1],'"'));
$content['content'] = trim($this->sanitize($sections[$i]));
$contents[] = $content;
}
return $contents;
}
-
+
public function commit()
{
- $this->_index->commit();
+ $this->_index->commit();
$count = $this->_index->count();
echo "\nSaving search index ({$count}) to {$this->_dir}\n\n";
}