Revision 869
Added by thorn about 17 years ago
| search_modext.php | ||
|---|---|---|
| 101 | 101 |
} |
| 102 | 102 |
$regex='/(?:^|\b|['.$str1.'])([^'.$str1.']{0,200}?'.$word.'[^'.$str2.']{0,200}(?:['.$str2.']|\b|$))/isu';
|
| 103 | 103 |
if(version_compare(PHP_VERSION, '4.3.3', '>=') && |
| 104 |
strpos(strtoupper(PHP_OS), 'WIN')!==0) { // this may crash windows server, so skip if on windows
|
|
| 104 |
strpos(strtoupper(PHP_OS), 'WIN')!==0 |
|
| 105 |
) { // this may crash windows server, so skip if on windows
|
|
| 105 | 106 |
// jump from match to match, get excerpt, stop if $max_excerpt_num is reached |
| 106 | 107 |
$last_end = 0; $offset = 0; |
| 107 | 108 |
while(preg_match('/'.$word.'/Sisu', $text, $match_array, PREG_OFFSET_CAPTURE, $last_end)) {
|
| ... | ... | |
| 115 | 116 |
if(count($excerpt_array) >= $max_excerpt_num) |
| 116 | 117 |
break; |
| 117 | 118 |
} |
| 118 |
} else { // problem - preg_match failed: can't find a start- or stop-sign
|
|
| 119 |
} else { // problem: preg_match failed - can't find a start- or stop-sign
|
|
| 119 | 120 |
$last_end += 201; // jump forward and try again |
| 120 | 121 |
} |
| 121 | 122 |
} |
| 122 |
} else { // compatile, but may be very slow with many large pages
|
|
| 123 |
} else { // compatible, but may be very slow with large pages
|
|
| 123 | 124 |
if(preg_match_all($regex, $text, $match_array)) {
|
| 124 | 125 |
foreach($match_array[1] AS $string) {
|
| 125 | 126 |
if(!preg_match('/\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\./', $string)) // skip excerpts with email-addresses
|
| ... | ... | |
| 173 | 174 |
if($match && is_array($match[0])) {
|
| 174 | 175 |
$x=$match[0][1]; // position of first match |
| 175 | 176 |
// is there an anchor nearby? |
| 176 |
if(preg_match_all('/<(?:[^>]+id|\s*a[^>]+name)\s*=\s*"(.*)"/SiU', substr($text,0,$x), $match, PREG_OFFSET_CAPTURE)) {
|
|
| 177 |
if(preg_match_all('/<(?:[^>]+id|\s*a[^>]+name)\s*=\s*"(.*)"/iU', substr($text,0,$x), $match, PREG_OFFSET_CAPTURE)) {
|
|
| 177 | 178 |
$anchor=''; |
| 178 | 179 |
foreach($match[1] AS $array) {
|
| 179 | 180 |
if($array[1] > $x) {
|
| ... | ... | |
| 234 | 235 |
if(!isset($mod_pic_link)) $mod_pic_link = ""; |
| 235 | 236 |
if(!isset($mod_no_highlight)) $mod_no_highlight = false; |
| 236 | 237 |
if(!isset($func_enable_flush)) $func_enable_flush = false; // set this in db: wb_search.cfg_enable_flush [READ THE DOC BEFORE] |
| 238 |
if(isset($mod_ext_charset) && $mod_ext_charset!='utf-8') $mod_ext_charset = 'utf-8'; // only utf-8 is allowed, yet. For other charset see DOCU |
|
| 237 | 239 |
if($mod_text == "") // nothing to do |
| 238 | 240 |
{ return false; }
|
| 239 | 241 |
if($mod_no_highlight) // no highlighting |
| ... | ... | |
| 243 | 245 |
$mod_text = preg_replace('#<(br( /)?|dt|/dd|/?(h[1-6]|tr|table|p|li|ul|pre|code|div|hr))[^>]*>#Si', '.', $mod_text);
|
| 244 | 246 |
$mod_text = preg_replace('/\s+/', ' ', $mod_text);
|
| 245 | 247 |
$mod_text = preg_replace('/ \./', '.', $mod_text);
|
| 248 |
if(isset($mod_ext_charset)) { // data from external database may have a different charset
|
|
| 249 |
require_once(WB_PATH.'/framework/functions-utf8.php'); |
|
| 250 |
$mod_text = charset_to_utf8($mod_text, $mod_ext_charset); |
|
| 251 |
} else {
|
|
| 246 | 252 |
$mod_text = entities_to_umlauts($mod_text, 'UTF-8'); |
| 253 |
} |
|
| 247 | 254 |
$anchor_text = $mod_text; // make an copy containing html-tags |
| 248 | 255 |
$mod_text = strip_tags($mod_text); |
| 249 | 256 |
$mod_text = str_replace(array('>','<','&','"',''',''',' '), array('>','<','&','"','\'','\'',"\xC2\xA0"), $mod_text);
|
| 257 |
$mod_text = '.'.trim($mod_text).'.'; |
|
| 250 | 258 |
// Do a fast scan over $mod_text first. This will speedup things a lot. |
| 251 | 259 |
if($func_search_match == 'all') {
|
| 252 | 260 |
if(!is_all_matched($mod_text, $func_search_words)) |
Also available in: Unified diff
search: added search_lang for use in search-form. Using DE, the search will search รค=ae,... (alternate spelling of german umlauts)
removed undocumented word-boundary search
search_path is now anchored to the beginning of link ("link LIKE '$path%'" instead of "link LIKE '%$path%'")
added key 'ext_charset' to search-extension to query external databases