diff options
Diffstat (limited to 'buildscripts/index')
| -rw-r--r-- | buildscripts/index/build.php | 4 | ||||
| -rw-r--r-- | buildscripts/index/quickstart_index.php | 48 | 
2 files changed, 25 insertions, 27 deletions
diff --git a/buildscripts/index/build.php b/buildscripts/index/build.php index 9ec0d659..0caaafb2 100644 --- a/buildscripts/index/build.php +++ b/buildscripts/index/build.php @@ -9,7 +9,7 @@  //quickstart source and the index data target directories.
 -$quickstart_source = realpath(dirname(__FILE__).'/../texbuilder/pages.php');
 +$quickstart_source = realpath(dirname(__FILE__).'/../texbuilder/quickstart/pages.php');
  $quickstart_base = realpath(dirname(__FILE__).'/../../demos/quickstart/protected/pages/');
  $quickstart_target = realpath(dirname(__FILE__).'/../../demos/quickstart/protected/index/quickstart/');
 @@ -46,7 +46,7 @@ if(isset($argv[1]))  		echo "Found ".count($hits)." for ".$query." in quick start\n";
  		foreach($hits as $hit)
  			echo "   ".$hit->title."\n";
 -			
 +
  		$a = new Zend_Search_Lucene($api_target);
  		$query = $argv[1];
  		$hits = $a->find(strtolower($query));
 diff --git a/buildscripts/index/quickstart_index.php b/buildscripts/index/quickstart_index.php index 565734ef..fb4bc829 100644 --- a/buildscripts/index/quickstart_index.php +++ b/buildscripts/index/quickstart_index.php @@ -4,10 +4,10 @@ class quickstart_index  {
  	private $_index;
  	private $_dir;
 -	
 +
  	private $_base;
  	private $_source;
 -	
 +
  	public function __construct($index_file, $base, $source)
  	{
  		$this->_index = new Zend_Search_Lucene($index_file, true);
 @@ -15,7 +15,7 @@ class quickstart_index  		$this->_base = $base;
  		$this->_source = $source;
  	}
 -	
 +
  	public function create_index()
  	{
  		echo "Building search index...\n";
 @@ -30,13 +30,13 @@ class quickstart_index  				$file_content = file_get_contents($page);
  				$this->add($file_content,$section, filemtime($page));
  				$count++;
 -			}		
 +			}
  		}
 -		
 +
  		$this->_index->commit();
 -		echo "\n {$count} files indexed.\n";		
 +		echo "\n {$count} files indexed.\n";
  	}
 -	
 +
  	public function add($content, $section, $mtime)
  	{
  		foreach($this->split_headings($content) as $headers)
 @@ -44,44 +44,44 @@ class quickstart_index  			$doc = new Zend_Search_Lucene_Document();
  			$link = "index.php?page=".preg_replace('/\/|\\\/', '.', $section);
  			$link = str_replace('.page', '', $link).'#'.$headers['section'];
 -			
 +
  			//unsearchable text
  			$doc->addField(Zend_Search_Lucene_Field::UnIndexed('link', $link));
  			$doc->addField(Zend_Search_Lucene_Field::UnIndexed('mtime', $mtime));
  			$doc->addField(Zend_Search_Lucene_Field::UnIndexed('title', $headers['title']));
 -			$doc->addField(Zend_Search_Lucene_Field::UnIndexed('text', $headers['content']));		
 -			
 +			$doc->addField(Zend_Search_Lucene_Field::UnIndexed('text', $headers['content']));
 +
  			//searchable text
  			$doc->addField(Zend_Search_Lucene_Field::Keyword('page', strtolower($headers['title'])));
  			$body = strtolower($this->sanitize($headers['content'])).' '.strtolower($headers['title']);
  			$doc->addField(Zend_Search_Lucene_Field::Unstored('contents',$body));
  			$this->_index->addDocument($doc);
 -		}		
 +		}
  	}
 -	
 -	function sanitize($input) 
 +
 +	function sanitize($input)
  	{
  		return htmlentities(strip_tags( $input ));
 -	}	
 -	
 +	}
 +
  	public function index()
  	{
  		return $this->_index;
  	}
 -	
 +
  	protected function split_headings($html)
  	{
  		$html = preg_replace('/<\/?com:TContent[^<]*>/', '', $html);
 -		
 +
  		$html = preg_replace('/<b>([^<]*)<\/b>/', '$1', $html);
  		$html = preg_replace('/<i>([^<]*)<\/i>/', '$1', $html);
  		$html = preg_replace('/<tt>([^<]*)<\/tt>/', '$1', $html);
 -		
 +
  		$html = preg_replace('/<h1([^>]*)>([^<]*)<\/h1>/', '<hh$1>$2</hh>', $html);
  		$html = preg_replace('/<h2([^>]*)>([^<]*)<\/h2>/', '<hh$1>$2</hh>', $html);
  		$html = preg_replace('/<h3([^>]*)>([^<]*)<\/h3>/', '<hh$1>$2</hh>', $html);
 -		
 -		
 +
 +
  		$sections = preg_split('/<hh[^>]*>([^<]+)<\/hh>/', $html,-1);
  		$headers = array();
  		preg_match_all('/<hh([^>]*)>([^<]+)<\/hh>/', $html, $headers);
 @@ -89,19 +89,17 @@ class quickstart_index  		for($i = 1, $t = count($sections); $i < $t; $i++)
  		{
  			$content['title'] = trim($this->sanitize($headers[2][$i-1]));
 -			$sec = array();
 -			preg_match('/"([^"]*)"/', $headers[1][$i-1], $sec);
 -			$content['section'] = str_replace('"', '',$sec[0]);
 +			$content['section'] = str_replace('"', '',trim($headers[1][$i-1],'"'));
  			$content['content'] = trim($this->sanitize($sections[$i]));
  			$contents[] = $content;
  		}
  		return $contents;
  	}
 -	
 +
  	public function commit()
  	{
 -		$this->_index->commit();		
 +		$this->_index->commit();
  		$count = $this->_index->count();
  		echo "\nSaving search index ({$count}) to {$this->_dir}\n\n";
  	}
  | 
