Revision 870
Added by thorn about 17 years ago
| search_modext.php | ||
|---|---|---|
| 235 | 235 |
if(!isset($mod_pic_link)) $mod_pic_link = ""; |
| 236 | 236 |
if(!isset($mod_no_highlight)) $mod_no_highlight = false; |
| 237 | 237 |
if(!isset($func_enable_flush)) $func_enable_flush = false; // set this in db: wb_search.cfg_enable_flush [READ THE DOC BEFORE] |
| 238 |
if(isset($mod_ext_charset) && $mod_ext_charset!='utf-8') $mod_ext_charset = 'utf-8'; // only utf-8 is allowed, yet. For other charset see DOCU |
|
| 238 |
if(isset($mod_ext_charset)) $mod_ext_charset = strtolower($mod_ext_charset); |
|
| 239 |
else $mod_ext_charset = ''; |
|
| 240 |
|
|
| 239 | 241 |
if($mod_text == "") // nothing to do |
| 240 | 242 |
{ return false; }
|
| 243 |
|
|
| 241 | 244 |
if($mod_no_highlight) // no highlighting |
| 242 | 245 |
{ $mod_page_link_target = "&nohighlight=1".$mod_page_link_target; }
|
| 243 | 246 |
// clean the text: |
| ... | ... | |
| 245 | 248 |
$mod_text = preg_replace('#<(br( /)?|dt|/dd|/?(h[1-6]|tr|table|p|li|ul|pre|code|div|hr))[^>]*>#Si', '.', $mod_text);
|
| 246 | 249 |
$mod_text = preg_replace('/\s+/', ' ', $mod_text);
|
| 247 | 250 |
$mod_text = preg_replace('/ \./', '.', $mod_text);
|
| 248 |
if(isset($mod_ext_charset)) { // data from external database may have a different charset
|
|
| 251 |
if($mod_ext_charset!='') { // data from external database may have a different charset
|
|
| 249 | 252 |
require_once(WB_PATH.'/framework/functions-utf8.php'); |
| 250 |
$mod_text = charset_to_utf8($mod_text, $mod_ext_charset); |
|
| 253 |
switch($mod_ext_charset) {
|
|
| 254 |
case 'latin1': |
|
| 255 |
case 'cp1252': |
|
| 256 |
$mod_text = charset_to_utf8($mod_text, 'CP1252'); |
|
| 257 |
break; |
|
| 258 |
case 'cp1251': |
|
| 259 |
$mod_text = charset_to_utf8($mod_text, 'CP1251'); |
|
| 260 |
break; |
|
| 261 |
case 'latin2': |
|
| 262 |
$mod_text = charset_to_utf8($mod_text, 'ISO-8859-2'); |
|
| 263 |
break; |
|
| 264 |
case 'hebrew': |
|
| 265 |
$mod_text = charset_to_utf8($mod_text, 'ISO-8859-8'); |
|
| 266 |
break; |
|
| 267 |
case 'greek': |
|
| 268 |
$mod_text = charset_to_utf8($mod_text, 'ISO-8859-7'); |
|
| 269 |
break; |
|
| 270 |
case 'latin5': |
|
| 271 |
$mod_text = charset_to_utf8($mod_text, 'ISO-8859-9'); |
|
| 272 |
break; |
|
| 273 |
case 'latin7': |
|
| 274 |
$mod_text = charset_to_utf8($mod_text, 'ISO-8859-13'); |
|
| 275 |
break; |
|
| 276 |
case 'utf8': |
|
| 277 |
default: |
|
| 278 |
$mod_text = charset_to_utf8($mod_text, 'UTF-8'); |
|
| 279 |
} |
|
| 251 | 280 |
} else {
|
| 252 | 281 |
$mod_text = entities_to_umlauts($mod_text, 'UTF-8'); |
| 253 | 282 |
} |
Also available in: Unified diff
improved ext_charset to work with latin1,cp1252,cp1251,latin2,hebrew,greek,latin5,latin7,utf8