Revision 870
Added by thorn about 16 years ago
search_modext.php | ||
---|---|---|
235 | 235 |
if(!isset($mod_pic_link)) $mod_pic_link = ""; |
236 | 236 |
if(!isset($mod_no_highlight)) $mod_no_highlight = false; |
237 | 237 |
if(!isset($func_enable_flush)) $func_enable_flush = false; // set this in db: wb_search.cfg_enable_flush [READ THE DOC BEFORE] |
238 |
if(isset($mod_ext_charset) && $mod_ext_charset!='utf-8') $mod_ext_charset = 'utf-8'; // only utf-8 is allowed, yet. For other charset see DOCU |
|
238 |
if(isset($mod_ext_charset)) $mod_ext_charset = strtolower($mod_ext_charset); |
|
239 |
else $mod_ext_charset = ''; |
|
240 |
|
|
239 | 241 |
if($mod_text == "") // nothing to do |
240 | 242 |
{ return false; } |
243 |
|
|
241 | 244 |
if($mod_no_highlight) // no highlighting |
242 | 245 |
{ $mod_page_link_target = "&nohighlight=1".$mod_page_link_target; } |
243 | 246 |
// clean the text: |
... | ... | |
245 | 248 |
$mod_text = preg_replace('#<(br( /)?|dt|/dd|/?(h[1-6]|tr|table|p|li|ul|pre|code|div|hr))[^>]*>#Si', '.', $mod_text); |
246 | 249 |
$mod_text = preg_replace('/\s+/', ' ', $mod_text); |
247 | 250 |
$mod_text = preg_replace('/ \./', '.', $mod_text); |
248 |
if(isset($mod_ext_charset)) { // data from external database may have a different charset
|
|
251 |
if($mod_ext_charset!='') { // data from external database may have a different charset
|
|
249 | 252 |
require_once(WB_PATH.'/framework/functions-utf8.php'); |
250 |
$mod_text = charset_to_utf8($mod_text, $mod_ext_charset); |
|
253 |
switch($mod_ext_charset) { |
|
254 |
case 'latin1': |
|
255 |
case 'cp1252': |
|
256 |
$mod_text = charset_to_utf8($mod_text, 'CP1252'); |
|
257 |
break; |
|
258 |
case 'cp1251': |
|
259 |
$mod_text = charset_to_utf8($mod_text, 'CP1251'); |
|
260 |
break; |
|
261 |
case 'latin2': |
|
262 |
$mod_text = charset_to_utf8($mod_text, 'ISO-8859-2'); |
|
263 |
break; |
|
264 |
case 'hebrew': |
|
265 |
$mod_text = charset_to_utf8($mod_text, 'ISO-8859-8'); |
|
266 |
break; |
|
267 |
case 'greek': |
|
268 |
$mod_text = charset_to_utf8($mod_text, 'ISO-8859-7'); |
|
269 |
break; |
|
270 |
case 'latin5': |
|
271 |
$mod_text = charset_to_utf8($mod_text, 'ISO-8859-9'); |
|
272 |
break; |
|
273 |
case 'latin7': |
|
274 |
$mod_text = charset_to_utf8($mod_text, 'ISO-8859-13'); |
|
275 |
break; |
|
276 |
case 'utf8': |
|
277 |
default: |
|
278 |
$mod_text = charset_to_utf8($mod_text, 'UTF-8'); |
|
279 |
} |
|
251 | 280 |
} else { |
252 | 281 |
$mod_text = entities_to_umlauts($mod_text, 'UTF-8'); |
253 | 282 |
} |
Also available in: Unified diff
improved ext_charset to work with latin1,cp1252,cp1251,latin2,hebrew,greek,latin5,latin7,utf8