Revision 881
Added by thorn about 16 years ago
search_modext.php | ||
---|---|---|
66 | 66 |
function is_all_matched($text, $search_words) { |
67 | 67 |
$all_matched = true; |
68 | 68 |
foreach ($search_words AS $word) { |
69 |
if(!preg_match('/'.$word.'/i', $text)) { |
|
69 |
if(!preg_match('/'.$word.'/ui', $text)) {
|
|
70 | 70 |
$all_matched = false; |
71 | 71 |
break; |
72 | 72 |
} |
... | ... | |
78 | 78 |
function is_any_matched($text, $search_words) { |
79 | 79 |
$any_matched = false; |
80 | 80 |
$word = '('.implode('|', $search_words).')'; |
81 |
if(preg_match('/'.$word.'/i', $text)) { |
|
81 |
if(preg_match('/'.$word.'/ui', $text)) {
|
|
82 | 82 |
$any_matched = true; |
83 | 83 |
} |
84 | 84 |
return $any_matched; |
... | ... | |
99 | 99 |
// stop-sign: .!?; + DOUBLE EXCLAMATION MARK - INTERROBANG - EXCLAMATION QUESTION MARK - QUESTION EXCLAMATION MARK - DOUBLE QUESTION MARK - HALFWIDTH IDEOGRAPHIC FULL STOP - IDEOGRAPHIC FULL STOP - IDEOGRAPHIC COMMA |
100 | 100 |
$str2=".!?;"."\xE2\x80\xBC"."\xE2\x80\xBD"."\xE2\x81\x89"."\xE2\x81\x88"."\xE2\x81\x87"."\xEF\xBD\xA1"."\xE3\x80\x82"."\xE3\x80\x81"; |
101 | 101 |
} |
102 |
$regex='/(?:^|\b|['.$str1.'])([^'.$str1.']{0,200}?'.$word.'[^'.$str2.']{0,200}(?:['.$str2.']|\b|$))/isu';
|
|
102 |
$regex='/(?:^|\b|['.$str1.'])([^'.$str1.']{0,200}?'.$word.'[^'.$str2.']{0,200}(?:['.$str2.']|\b|$))/uis';
|
|
103 | 103 |
if(version_compare(PHP_VERSION, '4.3.3', '>=') && |
104 | 104 |
strpos(strtoupper(PHP_OS), 'WIN')!==0 |
105 | 105 |
) { // this may crash windows server, so skip if on windows |
106 | 106 |
// jump from match to match, get excerpt, stop if $max_excerpt_num is reached |
107 | 107 |
$last_end = 0; $offset = 0; |
108 |
while(preg_match('/'.$word.'/Sisu', $text, $match_array, PREG_OFFSET_CAPTURE, $last_end)) {
|
|
108 |
while(preg_match('/'.$word.'/uis', $text, $match_array, PREG_OFFSET_CAPTURE, $last_end)) {
|
|
109 | 109 |
$offset = ($match_array[0][1]-206 < $last_end)?$last_end:$match_array[0][1]-206; |
110 | 110 |
if(preg_match($regex, $text, $matches, PREG_OFFSET_CAPTURE, $offset)) { |
111 | 111 |
$last_end = $matches[1][1]+strlen($matches[1][0])-1; |
... | ... | |
152 | 152 |
foreach($excerpt_array as $str) { |
153 | 153 |
$excerpt .= '#,,#'.preg_replace("/($string)/iu","#,,,,#$1#,,,,,#",$str).'#,,,#'; |
154 | 154 |
} |
155 |
$excerpt = str_replace(array('&','<','>','"','\'',"\xC2\xA0"), array('&','<','>','"',''',' '), $excerpt); |
|
155 |
$excerpt = str_replace(array('&','<','>','"','\'',"\xC2\xA0"), array('&','<','>','"',''',' '), $excerpt);
|
|
156 | 156 |
$excerpt = str_replace(array('#,,,,#','#,,,,,#'), array($EXCERPT_MARKUP_START,$EXCERPT_MARKUP_END), $excerpt); |
157 | 157 |
$excerpt = str_replace(array('#,,#','#,,,#'), array($EXCERPT_BEFORE,$EXCERPT_AFTER), $excerpt); |
158 | 158 |
// prepare to write out |
... | ... | |
170 | 170 |
// 4. $page_link_target=="" - do nothing |
171 | 171 |
if(version_compare(PHP_VERSION, '4.3.3', ">=") && substr($page_link_target,0,12)=='#wb_section_') { |
172 | 172 |
$word = '('.implode('|', $search_words).')'; |
173 |
preg_match('/'.$word.'/i', $text, $match, PREG_OFFSET_CAPTURE); |
|
173 |
preg_match('/'.$word.'/ui', $text, $match, PREG_OFFSET_CAPTURE);
|
|
174 | 174 |
if($match && is_array($match[0])) { |
175 | 175 |
$x=$match[0][1]; // position of first match |
176 | 176 |
// is there an anchor nearby? |
... | ... | |
255 | 255 |
if($mod_no_highlight) // no highlighting |
256 | 256 |
{ $mod_page_link_target = "&nohighlight=1".$mod_page_link_target; } |
257 | 257 |
// clean the text: |
258 |
$mod_text = preg_replace('#<(!--.*--|style.*</style|script.*</script)>#SiU', ' ', $mod_text);
|
|
259 |
$mod_text = preg_replace('#<(br( /)?|dt|/dd|/?(h[1-6]|tr|table|p|li|ul|pre|code|div|hr))[^>]*>#Si', '.', $mod_text);
|
|
260 |
$mod_text = preg_replace('/\s+/', ' ', $mod_text);
|
|
261 |
$mod_text = preg_replace('/ \./', '.', $mod_text);
|
|
258 |
$mod_text = preg_replace('#<(!--.*--|style.*</style|script.*</script)>#iU', ' ', $mod_text); |
|
259 |
$mod_text = preg_replace('#<(br( /)?|dt|/dd|/?(h[1-6]|tr|table|p|li|ul|pre|code|div|hr))[^>]*>#i', '.', $mod_text); |
|
260 |
$mod_text = preg_replace('/(\v\s?|\s\s)+/', ' ', $mod_text);
|
|
261 |
$mod_text = preg_replace('/\s\./', '.', $mod_text);
|
|
262 | 262 |
if($mod_ext_charset!='') { // data from external database may have a different charset |
263 | 263 |
require_once(WB_PATH.'/framework/functions-utf8.php'); |
264 | 264 |
switch($mod_ext_charset) { |
... | ... | |
293 | 293 |
} |
294 | 294 |
$anchor_text = $mod_text; // make an copy containing html-tags |
295 | 295 |
$mod_text = strip_tags($mod_text); |
296 |
$mod_text = str_replace(array('>','<','&','"',''',''',' '), array('>','<','&','"','\'','\'',"\xC2\xA0"), $mod_text); |
|
296 |
$mod_text = str_replace(array('>','<','&','"',''',''',' '), array('>','<','&','"','\'','\'',"\xC2\xA0"), $mod_text);
|
|
297 | 297 |
$mod_text = '.'.trim($mod_text).'.'; |
298 | 298 |
// Do a fast scan over $mod_text first. This will speedup things a lot. |
299 | 299 |
if($func_search_match == 'all') { |
Also available in: Unified diff
search: replaced most of $string_ul_umlauts (from search_convert.php) through use of preg's u-switch. Replaced strtr() by str_replace() (it's just faster). Changed ' to '