Project

General

Profile

« Previous | Next » 

Revision 869

Added by thorn about 16 years ago

search: added search_lang for use in search-form. Using DE, the search will search รค=ae,... (alternate spelling of german umlauts)
removed undocumented word-boundary search
search_path is now anchored to the beginning of link ("link LIKE '$path%'" instead of "link LIKE '%$path%'")
added key 'ext_charset' to search-extension to query external databases

View differences:

search_modext.php
101 101
	}
102 102
	$regex='/(?:^|\b|['.$str1.'])([^'.$str1.']{0,200}?'.$word.'[^'.$str2.']{0,200}(?:['.$str2.']|\b|$))/isu';
103 103
	if(version_compare(PHP_VERSION, '4.3.3', '>=') &&
104
	strpos(strtoupper(PHP_OS), 'WIN')!==0) { // this may crash windows server, so skip if on windows
104
	   strpos(strtoupper(PHP_OS), 'WIN')!==0
105
	) { // this may crash windows server, so skip if on windows
105 106
		// jump from match to match, get excerpt, stop if $max_excerpt_num is reached
106 107
		$last_end = 0; $offset = 0;
107 108
		while(preg_match('/'.$word.'/Sisu', $text, $match_array, PREG_OFFSET_CAPTURE, $last_end)) {
......
115 116
					if(count($excerpt_array) >= $max_excerpt_num)
116 117
						break;
117 118
				}
118
			} else { // problem - preg_match failed: can't find a start- or stop-sign
119
			} else { // problem: preg_match failed - can't find a start- or stop-sign
119 120
				$last_end += 201; // jump forward and try again
120 121
			}
121 122
		}
122
	} else { // compatile, but may be very slow with many large pages
123
	} else { // compatible, but may be very slow with large pages
123 124
		if(preg_match_all($regex, $text, $match_array)) {
124 125
			foreach($match_array[1] AS $string) {
125 126
				if(!preg_match('/\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\./', $string)) // skip excerpts with email-addresses
......
173 174
		if($match && is_array($match[0])) {
174 175
			$x=$match[0][1]; // position of first match
175 176
			// is there an anchor nearby?
176
			if(preg_match_all('/<(?:[^>]+id|\s*a[^>]+name)\s*=\s*"(.*)"/SiU', substr($text,0,$x), $match, PREG_OFFSET_CAPTURE)) {
177
			if(preg_match_all('/<(?:[^>]+id|\s*a[^>]+name)\s*=\s*"(.*)"/iU', substr($text,0,$x), $match, PREG_OFFSET_CAPTURE)) {
177 178
				$anchor='';
178 179
				foreach($match[1] AS $array) {
179 180
					if($array[1] > $x) {
......
234 235
	if(!isset($mod_pic_link))           $mod_pic_link = "";
235 236
	if(!isset($mod_no_highlight))       $mod_no_highlight = false;
236 237
	if(!isset($func_enable_flush))      $func_enable_flush = false; // set this in db: wb_search.cfg_enable_flush [READ THE DOC BEFORE]
238
	if(isset($mod_ext_charset) && $mod_ext_charset!='utf-8') $mod_ext_charset = 'utf-8'; // only utf-8 is allowed, yet. For other charset see DOCU
237 239
	if($mod_text == "") // nothing to do
238 240
		{ return false; }
239 241
	if($mod_no_highlight) // no highlighting
......
243 245
	$mod_text = preg_replace('#<(br( /)?|dt|/dd|/?(h[1-6]|tr|table|p|li|ul|pre|code|div|hr))[^>]*>#Si', '.', $mod_text);
244 246
	$mod_text = preg_replace('/\s+/', ' ', $mod_text);
245 247
	$mod_text = preg_replace('/ \./', '.', $mod_text);
248
	if(isset($mod_ext_charset)) { // data from external database may have a different charset
249
		require_once(WB_PATH.'/framework/functions-utf8.php');
250
		$mod_text = charset_to_utf8($mod_text, $mod_ext_charset);
251
	} else {
246 252
	$mod_text = entities_to_umlauts($mod_text, 'UTF-8');
253
	}
247 254
	$anchor_text = $mod_text; // make an copy containing html-tags
248 255
	$mod_text = strip_tags($mod_text);
249 256
	$mod_text = str_replace(array('&gt;','&lt;','&amp;','&quot;','&#39;','&apos;','&nbsp;'), array('>','<','&','"','\'','\'',"\xC2\xA0"), $mod_text);
257
	$mod_text = '.'.trim($mod_text).'.';
250 258
	// Do a fast scan over $mod_text first. This will speedup things a lot.
251 259
	if($func_search_match == 'all') {
252 260
		if(!is_all_matched($mod_text, $func_search_words))

Also available in: Unified diff