Revision 881
Added by thorn almost 17 years ago
| search_modext.php | ||
|---|---|---|
| 66 | 66 |
function is_all_matched($text, $search_words) {
|
| 67 | 67 |
$all_matched = true; |
| 68 | 68 |
foreach ($search_words AS $word) {
|
| 69 |
if(!preg_match('/'.$word.'/i', $text)) {
|
|
| 69 |
if(!preg_match('/'.$word.'/ui', $text)) {
|
|
| 70 | 70 |
$all_matched = false; |
| 71 | 71 |
break; |
| 72 | 72 |
} |
| ... | ... | |
| 78 | 78 |
function is_any_matched($text, $search_words) {
|
| 79 | 79 |
$any_matched = false; |
| 80 | 80 |
$word = '('.implode('|', $search_words).')';
|
| 81 |
if(preg_match('/'.$word.'/i', $text)) {
|
|
| 81 |
if(preg_match('/'.$word.'/ui', $text)) {
|
|
| 82 | 82 |
$any_matched = true; |
| 83 | 83 |
} |
| 84 | 84 |
return $any_matched; |
| ... | ... | |
| 99 | 99 |
// stop-sign: .!?; + DOUBLE EXCLAMATION MARK - INTERROBANG - EXCLAMATION QUESTION MARK - QUESTION EXCLAMATION MARK - DOUBLE QUESTION MARK - HALFWIDTH IDEOGRAPHIC FULL STOP - IDEOGRAPHIC FULL STOP - IDEOGRAPHIC COMMA |
| 100 | 100 |
$str2=".!?;"."\xE2\x80\xBC"."\xE2\x80\xBD"."\xE2\x81\x89"."\xE2\x81\x88"."\xE2\x81\x87"."\xEF\xBD\xA1"."\xE3\x80\x82"."\xE3\x80\x81"; |
| 101 | 101 |
} |
| 102 |
$regex='/(?:^|\b|['.$str1.'])([^'.$str1.']{0,200}?'.$word.'[^'.$str2.']{0,200}(?:['.$str2.']|\b|$))/isu';
|
|
| 102 |
$regex='/(?:^|\b|['.$str1.'])([^'.$str1.']{0,200}?'.$word.'[^'.$str2.']{0,200}(?:['.$str2.']|\b|$))/uis';
|
|
| 103 | 103 |
if(version_compare(PHP_VERSION, '4.3.3', '>=') && |
| 104 | 104 |
strpos(strtoupper(PHP_OS), 'WIN')!==0 |
| 105 | 105 |
) { // this may crash windows server, so skip if on windows
|
| 106 | 106 |
// jump from match to match, get excerpt, stop if $max_excerpt_num is reached |
| 107 | 107 |
$last_end = 0; $offset = 0; |
| 108 |
while(preg_match('/'.$word.'/Sisu', $text, $match_array, PREG_OFFSET_CAPTURE, $last_end)) {
|
|
| 108 |
while(preg_match('/'.$word.'/uis', $text, $match_array, PREG_OFFSET_CAPTURE, $last_end)) {
|
|
| 109 | 109 |
$offset = ($match_array[0][1]-206 < $last_end)?$last_end:$match_array[0][1]-206; |
| 110 | 110 |
if(preg_match($regex, $text, $matches, PREG_OFFSET_CAPTURE, $offset)) {
|
| 111 | 111 |
$last_end = $matches[1][1]+strlen($matches[1][0])-1; |
| ... | ... | |
| 152 | 152 |
foreach($excerpt_array as $str) {
|
| 153 | 153 |
$excerpt .= '#,,#'.preg_replace("/($string)/iu","#,,,,#$1#,,,,,#",$str).'#,,,#';
|
| 154 | 154 |
} |
| 155 |
$excerpt = str_replace(array('&','<','>','"','\'',"\xC2\xA0"), array('&','<','>','"',''',' '), $excerpt);
|
|
| 155 |
$excerpt = str_replace(array('&','<','>','"','\'',"\xC2\xA0"), array('&','<','>','"',''',' '), $excerpt);
|
|
| 156 | 156 |
$excerpt = str_replace(array('#,,,,#','#,,,,,#'), array($EXCERPT_MARKUP_START,$EXCERPT_MARKUP_END), $excerpt);
|
| 157 | 157 |
$excerpt = str_replace(array('#,,#','#,,,#'), array($EXCERPT_BEFORE,$EXCERPT_AFTER), $excerpt);
|
| 158 | 158 |
// prepare to write out |
| ... | ... | |
| 170 | 170 |
// 4. $page_link_target=="" - do nothing |
| 171 | 171 |
if(version_compare(PHP_VERSION, '4.3.3', ">=") && substr($page_link_target,0,12)=='#wb_section_') {
|
| 172 | 172 |
$word = '('.implode('|', $search_words).')';
|
| 173 |
preg_match('/'.$word.'/i', $text, $match, PREG_OFFSET_CAPTURE);
|
|
| 173 |
preg_match('/'.$word.'/ui', $text, $match, PREG_OFFSET_CAPTURE);
|
|
| 174 | 174 |
if($match && is_array($match[0])) {
|
| 175 | 175 |
$x=$match[0][1]; // position of first match |
| 176 | 176 |
// is there an anchor nearby? |
| ... | ... | |
| 255 | 255 |
if($mod_no_highlight) // no highlighting |
| 256 | 256 |
{ $mod_page_link_target = "&nohighlight=1".$mod_page_link_target; }
|
| 257 | 257 |
// clean the text: |
| 258 |
$mod_text = preg_replace('#<(!--.*--|style.*</style|script.*</script)>#SiU', ' ', $mod_text);
|
|
| 259 |
$mod_text = preg_replace('#<(br( /)?|dt|/dd|/?(h[1-6]|tr|table|p|li|ul|pre|code|div|hr))[^>]*>#Si', '.', $mod_text);
|
|
| 260 |
$mod_text = preg_replace('/\s+/', ' ', $mod_text);
|
|
| 261 |
$mod_text = preg_replace('/ \./', '.', $mod_text);
|
|
| 258 |
$mod_text = preg_replace('#<(!--.*--|style.*</style|script.*</script)>#iU', ' ', $mod_text);
|
|
| 259 |
$mod_text = preg_replace('#<(br( /)?|dt|/dd|/?(h[1-6]|tr|table|p|li|ul|pre|code|div|hr))[^>]*>#i', '.', $mod_text);
|
|
| 260 |
$mod_text = preg_replace('/(\v\s?|\s\s)+/', ' ', $mod_text);
|
|
| 261 |
$mod_text = preg_replace('/\s\./', '.', $mod_text);
|
|
| 262 | 262 |
if($mod_ext_charset!='') { // data from external database may have a different charset
|
| 263 | 263 |
require_once(WB_PATH.'/framework/functions-utf8.php'); |
| 264 | 264 |
switch($mod_ext_charset) {
|
| ... | ... | |
| 293 | 293 |
} |
| 294 | 294 |
$anchor_text = $mod_text; // make an copy containing html-tags |
| 295 | 295 |
$mod_text = strip_tags($mod_text); |
| 296 |
$mod_text = str_replace(array('>','<','&','"',''',''',' '), array('>','<','&','"','\'','\'',"\xC2\xA0"), $mod_text);
|
|
| 296 |
$mod_text = str_replace(array('>','<','&','"',''',''',' '), array('>','<','&','"','\'','\'',"\xC2\xA0"), $mod_text);
|
|
| 297 | 297 |
$mod_text = '.'.trim($mod_text).'.'; |
| 298 | 298 |
// Do a fast scan over $mod_text first. This will speedup things a lot. |
| 299 | 299 |
if($func_search_match == 'all') {
|
Also available in: Unified diff
search: replaced most of $string_ul_umlauts (from search_convert.php) through use of preg's u-switch. Replaced strtr() by str_replace() (it's just faster). Changed ' to '