235 |
235 |
if(!isset($mod_pic_link)) $mod_pic_link = "";
|
236 |
236 |
if(!isset($mod_no_highlight)) $mod_no_highlight = false;
|
237 |
237 |
if(!isset($func_enable_flush)) $func_enable_flush = false; // set this in db: wb_search.cfg_enable_flush [READ THE DOC BEFORE]
|
238 |
|
if(isset($mod_ext_charset) && $mod_ext_charset!='utf-8') $mod_ext_charset = 'utf-8'; // only utf-8 is allowed, yet. For other charset see DOCU
|
|
238 |
if(isset($mod_ext_charset)) $mod_ext_charset = strtolower($mod_ext_charset);
|
|
239 |
else $mod_ext_charset = '';
|
|
240 |
|
239 |
241 |
if($mod_text == "") // nothing to do
|
240 |
242 |
{ return false; }
|
|
243 |
|
241 |
244 |
if($mod_no_highlight) // no highlighting
|
242 |
245 |
{ $mod_page_link_target = "&nohighlight=1".$mod_page_link_target; }
|
243 |
246 |
// clean the text:
|
... | ... | |
245 |
248 |
$mod_text = preg_replace('#<(br( /)?|dt|/dd|/?(h[1-6]|tr|table|p|li|ul|pre|code|div|hr))[^>]*>#Si', '.', $mod_text);
|
246 |
249 |
$mod_text = preg_replace('/\s+/', ' ', $mod_text);
|
247 |
250 |
$mod_text = preg_replace('/ \./', '.', $mod_text);
|
248 |
|
if(isset($mod_ext_charset)) { // data from external database may have a different charset
|
|
251 |
if($mod_ext_charset!='') { // data from external database may have a different charset
|
249 |
252 |
require_once(WB_PATH.'/framework/functions-utf8.php');
|
250 |
|
$mod_text = charset_to_utf8($mod_text, $mod_ext_charset);
|
|
253 |
switch($mod_ext_charset) {
|
|
254 |
case 'latin1':
|
|
255 |
case 'cp1252':
|
|
256 |
$mod_text = charset_to_utf8($mod_text, 'CP1252');
|
|
257 |
break;
|
|
258 |
case 'cp1251':
|
|
259 |
$mod_text = charset_to_utf8($mod_text, 'CP1251');
|
|
260 |
break;
|
|
261 |
case 'latin2':
|
|
262 |
$mod_text = charset_to_utf8($mod_text, 'ISO-8859-2');
|
|
263 |
break;
|
|
264 |
case 'hebrew':
|
|
265 |
$mod_text = charset_to_utf8($mod_text, 'ISO-8859-8');
|
|
266 |
break;
|
|
267 |
case 'greek':
|
|
268 |
$mod_text = charset_to_utf8($mod_text, 'ISO-8859-7');
|
|
269 |
break;
|
|
270 |
case 'latin5':
|
|
271 |
$mod_text = charset_to_utf8($mod_text, 'ISO-8859-9');
|
|
272 |
break;
|
|
273 |
case 'latin7':
|
|
274 |
$mod_text = charset_to_utf8($mod_text, 'ISO-8859-13');
|
|
275 |
break;
|
|
276 |
case 'utf8':
|
|
277 |
default:
|
|
278 |
$mod_text = charset_to_utf8($mod_text, 'UTF-8');
|
|
279 |
}
|
251 |
280 |
} else {
|
252 |
281 |
$mod_text = entities_to_umlauts($mod_text, 'UTF-8');
|
253 |
282 |
}
|
improved ext_charset to work with latin1,cp1252,cp1251,latin2,hebrew,greek,latin5,latin7,utf8