Project

General

Profile

« Previous | Next » 

Revision 881

Added by thorn about 16 years ago

search: replaced most of $string_ul_umlauts (from search_convert.php) through use of preg's u-switch. Replaced strtr() by str_replace() (it's just faster). Changed ' to '

View differences:

search_modext.php
66 66
function is_all_matched($text, $search_words) {
67 67
	$all_matched = true;
68 68
	foreach ($search_words AS $word) {
69
		if(!preg_match('/'.$word.'/i', $text)) {
69
		if(!preg_match('/'.$word.'/ui', $text)) {
70 70
			$all_matched = false;
71 71
			break;
72 72
		}
......
78 78
function is_any_matched($text, $search_words) {
79 79
	$any_matched = false;
80 80
	$word = '('.implode('|', $search_words).')';
81
	if(preg_match('/'.$word.'/i', $text)) {
81
	if(preg_match('/'.$word.'/ui', $text)) {
82 82
		$any_matched = true;
83 83
	}
84 84
	return $any_matched;
......
99 99
		// stop-sign: .!?; + DOUBLE EXCLAMATION MARK - INTERROBANG - EXCLAMATION QUESTION MARK - QUESTION EXCLAMATION MARK - DOUBLE QUESTION MARK - HALFWIDTH IDEOGRAPHIC FULL STOP - IDEOGRAPHIC FULL STOP - IDEOGRAPHIC COMMA
100 100
		$str2=".!?;"."\xE2\x80\xBC"."\xE2\x80\xBD"."\xE2\x81\x89"."\xE2\x81\x88"."\xE2\x81\x87"."\xEF\xBD\xA1"."\xE3\x80\x82"."\xE3\x80\x81";
101 101
	}
102
	$regex='/(?:^|\b|['.$str1.'])([^'.$str1.']{0,200}?'.$word.'[^'.$str2.']{0,200}(?:['.$str2.']|\b|$))/isu';
102
	$regex='/(?:^|\b|['.$str1.'])([^'.$str1.']{0,200}?'.$word.'[^'.$str2.']{0,200}(?:['.$str2.']|\b|$))/uis';
103 103
	if(version_compare(PHP_VERSION, '4.3.3', '>=') &&
104 104
	   strpos(strtoupper(PHP_OS), 'WIN')!==0
105 105
	) { // this may crash windows server, so skip if on windows
106 106
		// jump from match to match, get excerpt, stop if $max_excerpt_num is reached
107 107
		$last_end = 0; $offset = 0;
108
		while(preg_match('/'.$word.'/Sisu', $text, $match_array, PREG_OFFSET_CAPTURE, $last_end)) {
108
		while(preg_match('/'.$word.'/uis', $text, $match_array, PREG_OFFSET_CAPTURE, $last_end)) {
109 109
			$offset = ($match_array[0][1]-206 < $last_end)?$last_end:$match_array[0][1]-206;
110 110
			if(preg_match($regex, $text, $matches, PREG_OFFSET_CAPTURE, $offset)) {
111 111
				$last_end = $matches[1][1]+strlen($matches[1][0])-1;
......
152 152
	foreach($excerpt_array as $str) {
153 153
		$excerpt .= '#,,#'.preg_replace("/($string)/iu","#,,,,#$1#,,,,,#",$str).'#,,,#';
154 154
	}
155
	$excerpt = str_replace(array('&','<','>','"','\'',"\xC2\xA0"), array('&amp;','&lt;','&gt;','&quot;','&#39;','&nbsp;'), $excerpt);
155
	$excerpt = str_replace(array('&','<','>','"','\'',"\xC2\xA0"), array('&amp;','&lt;','&gt;','&quot;','&#039;','&nbsp;'), $excerpt);
156 156
	$excerpt = str_replace(array('#,,,,#','#,,,,,#'), array($EXCERPT_MARKUP_START,$EXCERPT_MARKUP_END), $excerpt);
157 157
	$excerpt = str_replace(array('#,,#','#,,,#'), array($EXCERPT_BEFORE,$EXCERPT_AFTER), $excerpt);
158 158
	// prepare to write out
......
170 170
	// 4. $page_link_target=="" - do nothing
171 171
	if(version_compare(PHP_VERSION, '4.3.3', ">=") && substr($page_link_target,0,12)=='#wb_section_') {
172 172
		$word = '('.implode('|', $search_words).')';
173
		preg_match('/'.$word.'/i', $text, $match, PREG_OFFSET_CAPTURE);
173
		preg_match('/'.$word.'/ui', $text, $match, PREG_OFFSET_CAPTURE);
174 174
		if($match && is_array($match[0])) {
175 175
			$x=$match[0][1]; // position of first match
176 176
			// is there an anchor nearby?
......
255 255
	if($mod_no_highlight) // no highlighting
256 256
		{ $mod_page_link_target = "&amp;nohighlight=1".$mod_page_link_target; }
257 257
	// clean the text:
258
	$mod_text = preg_replace('#<(!--.*--|style.*</style|script.*</script)>#SiU', ' ', $mod_text);
259
	$mod_text = preg_replace('#<(br( /)?|dt|/dd|/?(h[1-6]|tr|table|p|li|ul|pre|code|div|hr))[^>]*>#Si', '.', $mod_text);
260
	$mod_text = preg_replace('/\s+/', ' ', $mod_text);
261
	$mod_text = preg_replace('/ \./', '.', $mod_text);
258
	$mod_text = preg_replace('#<(!--.*--|style.*</style|script.*</script)>#iU', ' ', $mod_text);
259
	$mod_text = preg_replace('#<(br( /)?|dt|/dd|/?(h[1-6]|tr|table|p|li|ul|pre|code|div|hr))[^>]*>#i', '.', $mod_text);
260
	$mod_text = preg_replace('/(\v\s?|\s\s)+/', ' ', $mod_text);
261
	$mod_text = preg_replace('/\s\./', '.', $mod_text);
262 262
	if($mod_ext_charset!='') { // data from external database may have a different charset
263 263
		require_once(WB_PATH.'/framework/functions-utf8.php');
264 264
		switch($mod_ext_charset) {
......
293 293
	}
294 294
	$anchor_text = $mod_text; // make an copy containing html-tags
295 295
	$mod_text = strip_tags($mod_text);
296
	$mod_text = str_replace(array('&gt;','&lt;','&amp;','&quot;','&#39;','&apos;','&nbsp;'), array('>','<','&','"','\'','\'',"\xC2\xA0"), $mod_text);
296
	$mod_text = str_replace(array('&gt;','&lt;','&amp;','&quot;','&#039;','&apos;','&nbsp;'), array('>','<','&','"','\'','\'',"\xC2\xA0"), $mod_text);
297 297
	$mod_text = '.'.trim($mod_text).'.';
298 298
	// Do a fast scan over $mod_text first. This will speedup things a lot.
299 299
	if($func_search_match == 'all') {

Also available in: Unified diff