summaryrefslogtreecommitdiff
path: root/buildscripts/index/api_index.php
blob: 339cb0427a37dd90a58f7f84ed494ce341bb3bef (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
<?php
/*
 * Created on 10/05/2006
 */
 
class api_index 
{
	const API_URL = '';
	
	private $_index;	
	private $_api;
	
	public function __construct($index_file, $api)
	{
		$this->_api = $api;
		$this->_index = new Zend_Search_Lucene($index_file, true);
		
		
	}
	
	function create_index()
	{
		echo "Building search index...\n";
		$files = $this->get_file_list($this->_api);
		$count = 0;
		foreach($files as $file)
		{
			echo " processing $file...\n";
			$content = $this->get_details($file, $this->_api);
			
			$doc = new Zend_Search_Lucene_Document();
			
			$title = $content['namespace'].'.'.$content['class'];
			
			echo "  Adding ".$title."\n";
			
			//unsearchable text
			$doc->addField(Zend_Search_Lucene_Field::UnIndexed('link', $content['link']));
			$doc->addField(Zend_Search_Lucene_Field::UnIndexed('title', $title));
			//$doc->addField(Zend_Search_Lucene_Field::UnIndexed('text', $content['content']));
			
			//searchable
			$body = strtolower($this->sanitize($content['content'])).' '.strtolower($title);			
			$doc->addField(Zend_Search_Lucene_Field::Keyword('page', strtolower(str_replace('.',' ',$title))));
			$doc->addField(Zend_Search_Lucene_Field::Unstored('contents',$body));
			$this->_index->addDocument($doc);
			$count++;
		}
		$this->_index->commit();
		echo "\n {$count} files indexed.\n";
	}

	function sanitize($input) 
	{
		return htmlentities(strip_tags( $input ));
	}	


	function get_file_list($path)
	{
		
		$d = dir($path);
		
		$files = array();
		while (false !== ($entry = $d->read())) 
		{
	   		$filepath = $path.'/'.$entry;
	   		
	   		if(is_dir($filepath) && is_int(strpos($entry, 'System')))
	   		{
	   			$files = array_merge($files, $this->get_files($filepath));
	   		}
		}
		
		$d->close();
		return $files;
	}
	
	function get_files($path)
	{
		$d = dir($path);
		
		$files = array();
		while (false !== ($entry = $d->read()))
		{
			$filepath = $path.'/'.$entry;
			if(is_file($filepath) && $entry[0] !== '_')
				$files[] = realpath($filepath);
		}
		$d->close();
		return $files;
	}
	
	function get_doc_content($file)
	{
		$content = file_get_contents($file);
		$html = preg_replace('/<h1>/','~~~', $content);
		$html = preg_replace('/<![^~]+/m', '', $html);
		$html = preg_replace('/<div class="credit">[\s\w\W\S]+/m', '', $html);
		$html = preg_replace('/&nbsp;|~+|\s{2,}/',' ',$html);
		$html = preg_replace('/\s{2,}/',' ',$html);
		$text = strip_tags($html);
		$text = str_replace(' , ',', ',$text);
		return $text;
	}
	
	function get_details($file, $base)
	{
		$result['content'] = $this->get_doc_content($file);
		$find = array($base, '.html', '-');
		$replace = array('', '', '.');
		$path = preg_split('/\/|\\\/', str_replace($find, $replace, $file));
		$result['namespace'] = $path[1];
		$result['class'] = $path[2];
		$result['link'] = self::API_URL.$path[1].'/'.$path[2].'.html';
		return $result;
	}
}


?>