Project

General

Profile

« Previous | Next » 

Revision 445

Added by Matthias about 17 years ago

Optimized search and highlighting funktions

View differences:

functions.php
345 345
	return($string);
346 346
}
347 347

  
348
// Function to get the DEFAULT_CHARSET
349
function get_wbcharset() {
350
	$charset=strtoupper(DEFAULT_CHARSET);
351
	if(strcmp($charset,"BIG5") == 0) {
352
		$charset="BIG-5";
353
	}
354
	return($charset);
355
}
356

  
357 348
// Function to convert a string from $from- to $to-encoding, using mysql
358 349
function my_mysql_iconv($string, $from, $to) {
359 350
	// keep current character set values:
......
372 363
	return $converted_string;
373 364
}
374 365

  
375
// Function to convert a string from html-entities to umlauts
376
// and encode htmlspecialchars
377
function entities_to_umlauts($string) {
378
	$charset = get_wbcharset();
366
// Function to convert a string from mixed html-entities/umlauts to pure utf-8-umlauts
367
function string_to_utf8($string, $charset=DEFAULT_CHARSET) {
368
	$charset = strtoupper($charset);
369
	if ($charset == '') { $charset = 'ISO-8859-1'; }
370

  
379 371
	// there's no GB2312 or ISO-8859-11 encoding in php's mb_* functions
380
	if (strcmp($charset,"GB2312") == 0) {
381
		if(function_exists('iconv')) {
382
			$string=mb_convert_encoding($string,'UTF-8','HTML-ENTITIES');
383
			$string=iconv("UTF-8","GB2312",$string);
384
		} else {
385
			$string=mb_convert_encoding($string,'UTF-8','HTML-ENTITIES');
386
			$string=my_mysql_iconv($string, 'utf8', 'gb2312');
387
		}
388
	} elseif (strcmp($charset,"ISO-8859-11") == 0) {
389
		if(function_exists('iconv')) {
390
			$string=mb_convert_encoding($string,'UTF-8','HTML-ENTITIES');
391
			$string=iconv("UTF-8","ISO-8859-11",$string);
392
		} else {
393
			$string=mb_convert_encoding($string,'UTF-8','HTML-ENTITIES');
394
			$string=my_mysql_iconv($string, 'utf8', 'tis620');
395
		}
372
	if ($charset == "GB2312") {
373
		$string=my_mysql_iconv($string, 'gb2312', 'utf8');
374
	} elseif ($charset == "ISO-8859-11") {
375
		$string=my_mysql_iconv($string, 'tis620', 'utf8');
396 376
	} else {
397
		$string=mb_convert_encoding($string,$charset,'HTML-ENTITIES');
377
		$string=mb_convert_encoding($string, 'UTF-8', $charset);
398 378
	}
399
	$string=htmlspecialchars($string);
379
	$string=mb_convert_encoding($string, 'HTML-ENTITIES', 'UTF-8');
380
	$string=mb_convert_encoding($string, 'UTF-8', 'HTML-ENTITIES');
400 381
	return($string);
401 382
}
402 383

  
403
// Function to convert a string from umlauts to html-entities
404
// and encode htmlspecialchars
405
function umlauts_to_entities($string) {
406
	$charset=get_wbcharset();
407
	// there's no GB2312 or ISO-8859-11 encoding in php's mb_* functions
408
	if (strcmp($charset,"GB2312") == 0) {
409
		if(function_exists('iconv')) {
410
			$string=iconv("GB2312","UTF-8",$string);
411
			$charset="UTF-8";
384
// Function to convert a string from mixed html-entities/umlauts to pure $charset_out-umlauts
385
function entities_to_umlauts($string, $charset_out=DEFAULT_CHARSET, $convert_htmlspecialchars=0) {
386
	$charset_out = strtoupper($charset_out);
387
	if ($charset_out == '') {
388
		$charset_out = 'ISO-8859-1';
389
	}
390
	$string = string_to_utf8($string);
391
	if($charset_out != 'UTF-8') {
392
		if ($charset_out == "GB2312") {
393
			$string=my_mysql_iconv($string, 'utf8', 'gb2312');
394
		} elseif ($charset_out == "ISO-8859-11") {
395
			$string=my_mysql_iconv($string, 'utf8', 'tis620');
412 396
		} else {
413
			$string=my_mysql_iconv($string, 'gb2312', 'utf8');
414
			$charset="UTF-8";
397
			$string=mb_convert_encoding($string, $charset_out, 'UTF-8');
415 398
		}
416
	} elseif (strcmp($charset,"ISO-8859-11") == 0) {
417
		if(function_exists('iconv')) {
418
			$string=iconv("ISO-8859-11","UTF-8",$string);
419
			$charset="UTF-8";
420
		} else {
421
			$string=my_mysql_iconv($string, 'tis620', 'utf8');
422
			$charset="UTF-8";
423
		}
424 399
	}
425
	$string=mb_convert_encoding($string,'HTML-ENTITIES',$charset);
426
	$string=mb_convert_encoding($string,'UTF-8','HTML-ENTITIES');
427
	$string=htmlspecialchars($string,ENT_QUOTES);
400
	if($convert_htmlspecialchars == 1) {
401
		$string=htmlspecialchars($string);
402
	}
403
	return($string);
404
}
405

  
406
// Function to convert a string from mixed html-entitites/$charset_in-umlauts to pure html-entities
407
function umlauts_to_entities($string, $charset_in=DEFAULT_CHARSET, $convert_htmlspecialchars=1) {
408
	$charset_in = strtoupper($charset_in);
409
	if ($charset_in == "") {
410
		$charset_in = 'ISO-8859-1';
411
	}
412
	$string = string_to_utf8($string, $charset_in);
413
	if($convert_htmlspecialchars == 1) {
414
		$string=htmlspecialchars($string,ENT_QUOTES);
415
	}
428 416
	$string=mb_convert_encoding($string,'HTML-ENTITIES','UTF-8');
429 417
	return($string);
430 418
}
431 419

  
432
// translate any "latin" html-entities to their plain 7bit equivalents
420
// translate any latin/greek/cyrillic html-entities to their plain 7bit equivalents
433 421
function entities_to_7bit($string) {
434 422
	require(WB_PATH.'/framework/convert.php');
435 423
	$string = strtr($string, $conversion_array);

Also available in: Unified diff