Project

General

Profile

« Previous | Next » 

Revision 870

Added by thorn about 16 years ago

improved ext_charset to work with latin1,cp1252,cp1251,latin2,hebrew,greek,latin5,latin7,utf8

View differences:

search_modext.php
235 235
	if(!isset($mod_pic_link))           $mod_pic_link = "";
236 236
	if(!isset($mod_no_highlight))       $mod_no_highlight = false;
237 237
	if(!isset($func_enable_flush))      $func_enable_flush = false; // set this in db: wb_search.cfg_enable_flush [READ THE DOC BEFORE]
238
	if(isset($mod_ext_charset) && $mod_ext_charset!='utf-8') $mod_ext_charset = 'utf-8'; // only utf-8 is allowed, yet. For other charset see DOCU
238
	if(isset($mod_ext_charset)) $mod_ext_charset = strtolower($mod_ext_charset);
239
	else $mod_ext_charset = '';
240

  
239 241
	if($mod_text == "") // nothing to do
240 242
		{ return false; }
243

  
241 244
	if($mod_no_highlight) // no highlighting
242 245
		{ $mod_page_link_target = "&nohighlight=1".$mod_page_link_target; }
243 246
	// clean the text:
......
245 248
	$mod_text = preg_replace('#<(br( /)?|dt|/dd|/?(h[1-6]|tr|table|p|li|ul|pre|code|div|hr))[^>]*>#Si', '.', $mod_text);
246 249
	$mod_text = preg_replace('/\s+/', ' ', $mod_text);
247 250
	$mod_text = preg_replace('/ \./', '.', $mod_text);
248
	if(isset($mod_ext_charset)) { // data from external database may have a different charset
251
	if($mod_ext_charset!='') { // data from external database may have a different charset
249 252
		require_once(WB_PATH.'/framework/functions-utf8.php');
250
		$mod_text = charset_to_utf8($mod_text, $mod_ext_charset);
253
		switch($mod_ext_charset) {
254
		case 'latin1':
255
		case 'cp1252':
256
			$mod_text = charset_to_utf8($mod_text, 'CP1252');
257
			break;
258
		case 'cp1251':
259
			$mod_text = charset_to_utf8($mod_text, 'CP1251');
260
			break;
261
		case 'latin2':
262
			$mod_text = charset_to_utf8($mod_text, 'ISO-8859-2');
263
			break;
264
		case 'hebrew':
265
			$mod_text = charset_to_utf8($mod_text, 'ISO-8859-8');
266
			break;
267
		case 'greek':
268
			$mod_text = charset_to_utf8($mod_text, 'ISO-8859-7');
269
			break;
270
		case 'latin5':
271
			$mod_text = charset_to_utf8($mod_text, 'ISO-8859-9');
272
			break;
273
		case 'latin7':
274
			$mod_text = charset_to_utf8($mod_text, 'ISO-8859-13');
275
			break;
276
		case 'utf8':
277
		default:
278
			$mod_text = charset_to_utf8($mod_text, 'UTF-8');
279
		}
251 280
	} else {
252 281
	$mod_text = entities_to_umlauts($mod_text, 'UTF-8');
253 282
	}

Also available in: Unified diff