diff options
Diffstat (limited to 'buildscripts/index')
| -rw-r--r-- | buildscripts/index/api_index.php | 120 | ||||
| -rw-r--r-- | buildscripts/index/build.php | 65 | ||||
| -rw-r--r-- | buildscripts/index/quickstart_index.php | 109 | 
3 files changed, 294 insertions, 0 deletions
diff --git a/buildscripts/index/api_index.php b/buildscripts/index/api_index.php new file mode 100644 index 00000000..ac2e37a7 --- /dev/null +++ b/buildscripts/index/api_index.php @@ -0,0 +1,120 @@ +<?php
 +/*
 + * Created on 10/05/2006
 + */
 + 
 +class api_index 
 +{
 +	const API_URL = '';
 +	
 +	private $_index;	
 +	private $_api;
 +	
 +	public function __construct($index_file, $api)
 +	{
 +		$this->_api = $api;
 +		$this->_index = new Zend_Search_Lucene($index_file, true);
 +		
 +		
 +	}
 +	
 +	function create_index()
 +	{
 +		echo "Building search index...\n";
 +		$files = $this->get_file_list($this->_api);
 +		$count = 0;
 +		foreach($files as $file)
 +		{
 +			$content = $this->get_details($file, $this->_api);
 +			
 +			$doc = new Zend_Search_Lucene_Document();
 +			
 +			$title = $content['namespace'].'.'.$content['class'];
 +			
 +			echo "  Adding ".$title."\n";
 +			
 +			//unsearchable text
 +			$doc->addField(Zend_Search_Lucene_Field::UnIndexed('link', $content['link']));
 +			$doc->addField(Zend_Search_Lucene_Field::UnIndexed('title', $title));
 +			//$doc->addField(Zend_Search_Lucene_Field::UnIndexed('text', $content['content']));
 +			
 +			//searchable
 +			$body = strtolower($this->sanitize($content['content'])).' '.strtolower($title);			
 +			$doc->addField(Zend_Search_Lucene_Field::Keyword('page', strtolower(str_replace('.',' ',$title))));
 +			$doc->addField(Zend_Search_Lucene_Field::Unstored('contents',$body));
 +			$this->_index->addDocument($doc);
 +			$count++;
 +		}
 +		$this->_index->commit();
 +		echo "\n {$count} files indexed.\n";
 +	}
 +
 +	function sanitize($input) 
 +	{
 +		return htmlentities(strip_tags( $input ));
 +	}	
 +
 +
 +	function get_file_list($path)
 +	{
 +		
 +		$d = dir($path);
 +		
 +		$files = array();
 +		while (false !== ($entry = $d->read())) 
 +		{
 +	   		$filepath = $path.'/'.$entry;
 +	   		
 +	   		if(is_dir($filepath) && is_int(strpos($entry, 'System')))
 +	   		{
 +	   			$files = array_merge($files, $this->get_files($filepath));
 +	   		}
 +		}
 +		
 +		$d->close();
 +		return $files;
 +	}
 +	
 +	function get_files($path)
 +	{
 +		$d = dir($path);
 +		
 +		$files = array();
 +		while (false !== ($entry = $d->read()))
 +		{
 +			$filepath = $path.'/'.$entry;
 +			if(is_file($filepath) && $entry[0] !== '_')
 +				$files[] = realpath($filepath);
 +		}
 +		return $files;
 +		$d->close();
 +	}
 +	
 +	function get_doc_content($file)
 +	{
 +		$content = file_get_contents($file);
 +		$html = preg_replace('/<h1>/','~~~', $content);
 +		$html = preg_replace('/<![^~]+/m', '', $html);
 +		$html = preg_replace('/<div class="credit">[\s\w\W\S]+/m', '', $html);
 +		$html = preg_replace('/ |~+|\s{2,}/',' ',$html);
 +		$html = preg_replace('/\s{2,}/',' ',$html);
 +		$text = strip_tags($html);
 +		$text = str_replace(' , ',', ',$text);
 +		return $text;
 +	}
 +	
 +	function get_details($file, $base)
 +	{
 +		$result['content'] = $this->get_doc_content($file);
 +		$find = array($base, '.html', '-');
 +		$replace = array('', '', '.');
 +		$path = preg_split('/\/|\\\/', str_replace($find, $replace, $file));
 +		$result['namespace'] = $path[1];
 +		$result['class'] = $path[2];
 +		$result['link'] = self::API_URL.$path[1].'/'.$path[2].'.html';
 +		return $result;
 +	}
 +}
 +
 +
 +?>
\ No newline at end of file diff --git a/buildscripts/index/build.php b/buildscripts/index/build.php new file mode 100644 index 00000000..9ec0d659 --- /dev/null +++ b/buildscripts/index/build.php @@ -0,0 +1,65 @@ +<?php
 +/*
 + * Created on 10/05/2006
 + */
 +
 +/**
 + * Building search index for quickstart tutorials and the API documentation.
 + */
 +
 +
 +//quickstart source and the index data target directories.
 +$quickstart_source = realpath(dirname(__FILE__).'/../texbuilder/pages.php');
 +$quickstart_base = realpath(dirname(__FILE__).'/../../demos/quickstart/protected/pages/');
 +$quickstart_target = realpath(dirname(__FILE__).'/../../demos/quickstart/protected/index/quickstart/');
 +
 +//API source and the index data target directories.
 +$api_source = realpath(dirname(__FILE__).'/../../build/docs/manual/');
 +$api_target = realpath(dirname(__FILE__).'/../../demos/quickstart/protected/index/api/');
 +
 +//get the ZEND framework
 +$zend_path = realpath(dirname(__FILE__).'/../../demos/quickstart/protected/index');
 +set_include_path(get_include_path().';'.$zend_path);
 +require_once ('Zend/Search/Lucene.php');
 +
 +//get the indexers.
 +include('quickstart_index.php');
 +include('API_index.php');
 +
 +if(isset($argv[1]))
 +{
 +	if(strtolower($argv[1]) == "quickstart")
 +	{
 +		$quickstart = new quickstart_index($quickstart_target, $quickstart_base, $quickstart_source);
 +		$quickstart->create_index();
 +	}
 +	else if(strtolower($argv[1]) == "api")
 +	{
 +		$api = new api_index($api_target, $api_source);
 +		$api->create_index();
 +	}
 +	else
 +	{
 +		$q = new Zend_Search_Lucene($quickstart_target);
 +		$query = $argv[1];
 +		$hits = $q->find(strtolower($query));
 +		echo "Found ".count($hits)." for ".$query." in quick start\n";
 +		foreach($hits as $hit)
 +			echo "   ".$hit->title."\n";
 +			
 +		$a = new Zend_Search_Lucene($api_target);
 +		$query = $argv[1];
 +		$hits = $a->find(strtolower($query));
 +		echo "\nFound ".count($hits)." for ".$query." in API\n";
 +		foreach($hits as $hit)
 +		{
 +			echo "   ".$hit->link."\n";
 +		}
 +	}
 +}
 +else
 +{
 +	echo "Usage: 'php build.php quickstart' or 'php build.php api'\n";
 +}
 +
 +?>
\ No newline at end of file diff --git a/buildscripts/index/quickstart_index.php b/buildscripts/index/quickstart_index.php new file mode 100644 index 00000000..565734ef --- /dev/null +++ b/buildscripts/index/quickstart_index.php @@ -0,0 +1,109 @@ +<?php
 +
 +class quickstart_index
 +{
 +	private $_index;
 +	private $_dir;
 +	
 +	private $_base;
 +	private $_source;
 +	
 +	public function __construct($index_file, $base, $source)
 +	{
 +		$this->_index = new Zend_Search_Lucene($index_file, true);
 +		$this->_dir = $index_file;
 +		$this->_base = $base;
 +		$this->_source = $source;
 +	}
 +	
 +	public function create_index()
 +	{
 +		echo "Building search index...\n";
 +		$pages = include($this->_source);
 +		$count = 0;
 +		foreach($pages as $chapter => $sections)
 +		{
 +			foreach($sections as $section)
 +			{
 +				echo "    Adding $section\n";
 +				$page = $this->_base.'/'.$section;
 +				$file_content = file_get_contents($page);
 +				$this->add($file_content,$section, filemtime($page));
 +				$count++;
 +			}		
 +		}
 +		
 +		$this->_index->commit();
 +		echo "\n {$count} files indexed.\n";		
 +	}
 +	
 +	public function add($content, $section, $mtime)
 +	{
 +		foreach($this->split_headings($content) as $headers)
 +		{
 +			$doc = new Zend_Search_Lucene_Document();
 +			$link = "index.php?page=".preg_replace('/\/|\\\/', '.', $section);
 +			$link = str_replace('.page', '', $link).'#'.$headers['section'];
 +			
 +			//unsearchable text
 +			$doc->addField(Zend_Search_Lucene_Field::UnIndexed('link', $link));
 +			$doc->addField(Zend_Search_Lucene_Field::UnIndexed('mtime', $mtime));
 +			$doc->addField(Zend_Search_Lucene_Field::UnIndexed('title', $headers['title']));
 +			$doc->addField(Zend_Search_Lucene_Field::UnIndexed('text', $headers['content']));		
 +			
 +			//searchable text
 +			$doc->addField(Zend_Search_Lucene_Field::Keyword('page', strtolower($headers['title'])));
 +			$body = strtolower($this->sanitize($headers['content'])).' '.strtolower($headers['title']);
 +			$doc->addField(Zend_Search_Lucene_Field::Unstored('contents',$body));
 +			$this->_index->addDocument($doc);
 +		}		
 +	}
 +	
 +	function sanitize($input) 
 +	{
 +		return htmlentities(strip_tags( $input ));
 +	}	
 +	
 +	public function index()
 +	{
 +		return $this->_index;
 +	}
 +	
 +	protected function split_headings($html)
 +	{
 +		$html = preg_replace('/<\/?com:TContent[^<]*>/', '', $html);
 +		
 +		$html = preg_replace('/<b>([^<]*)<\/b>/', '$1', $html);
 +		$html = preg_replace('/<i>([^<]*)<\/i>/', '$1', $html);
 +		$html = preg_replace('/<tt>([^<]*)<\/tt>/', '$1', $html);
 +		
 +		$html = preg_replace('/<h1([^>]*)>([^<]*)<\/h1>/', '<hh$1>$2</hh>', $html);
 +		$html = preg_replace('/<h2([^>]*)>([^<]*)<\/h2>/', '<hh$1>$2</hh>', $html);
 +		$html = preg_replace('/<h3([^>]*)>([^<]*)<\/h3>/', '<hh$1>$2</hh>', $html);
 +		
 +		
 +		$sections = preg_split('/<hh[^>]*>([^<]+)<\/hh>/', $html,-1);
 +		$headers = array();
 +		preg_match_all('/<hh([^>]*)>([^<]+)<\/hh>/', $html, $headers);
 +		$contents = array();
 +		for($i = 1, $t = count($sections); $i < $t; $i++)
 +		{
 +			$content['title'] = trim($this->sanitize($headers[2][$i-1]));
 +			$sec = array();
 +			preg_match('/"([^"]*)"/', $headers[1][$i-1], $sec);
 +			$content['section'] = str_replace('"', '',$sec[0]);
 +			$content['content'] = trim($this->sanitize($sections[$i]));
 +			$contents[] = $content;
 +		}
 +
 +		return $contents;
 +	}
 +	
 +	public function commit()
 +	{
 +		$this->_index->commit();		
 +		$count = $this->_index->count();
 +		echo "\nSaving search index ({$count}) to {$this->_dir}\n\n";
 +	}
 +}
 +?>
\ No newline at end of file  | 
