Project

General

Profile

« Previous | Next » 

Revision 442

Added by Matthias over 17 years ago

Fixed issues with some languages when UTF8 is used.
Removed all htmlentites from the code (revoked changeset [396]) and added new functions instead.
Adapted the search and the highlighting to use the new functions.
With this changes WB can now be used with charset UTF8 for all languages.

View differences:

functions.php
338 338
	return $subs;
339 339
}
340 340

  
341
// Function as replecement for php's htmlspecialchars()
342
function my_htmlspecialchars($string) {
343
	$string = umlauts_to_entities($string);
344
	$string = entities_to_umlauts($string);
345
	return($string);
346
}
347

  
348
// Function to get the DEFAULT_CHARSET
349
function get_wbcharset() {
350
	$charset=strtoupper(DEFAULT_CHARSET);
351
	if(strcmp($charset,"BIG5") == 0) {
352
		$charset="BIG-5";
353
	}
354
	return($charset);
355
}
356

  
357
// Function to convert a string from $from- to $to-encoding, using mysql
358
function my_mysql_iconv($string, $from, $to) {
359
	// keep current character set values:
360
	$character_set_database = mysql_result(mysql_query("SELECT @@character_set_client"),0,0);
361
	$character_set_results = mysql_result(mysql_query("SELECT @@character_set_results"),0,0);
362
	$collation_results = mysql_result(mysql_query("SELECT @@collation_connection"),0,0);
363
	mysql_query("SET character_set_client=$from");
364
	mysql_query("SET character_set_results=$to");
365
	mysql_query("SET collation_connection=utf8_unicode_ci");
366
	$string_escaped = mysql_real_escape_string($string);
367
	$converted_string = mysql_result(mysql_query("SELECT '$string_escaped'"),0,0);
368
	// restore previous character set values:
369
	mysql_query("SET character_set_client=$character_set_database");
370
	mysql_query("SET character_set_results=$character_set_results");
371
	mysql_query("SET collation_connection=$collation_results");
372
	return $converted_string;
373
}
374

  
375
// Function to convert a string from html-entities to umlauts
376
// and encode htmlspecialchars
377
function entities_to_umlauts($string) {
378
	$charset = get_wbcharset();
379
	// there's no GB2312 or ISO-8859-11 encoding in php's mb_* functions
380
	if (strcmp($charset,"GB2312") == 0) {
381
		if(function_exists('iconv')) {
382
			$string=mb_convert_encoding($string,'UTF-8','HTML-ENTITIES');
383
			$string=iconv("UTF-8","GB2312",$string);
384
		} else {
385
			$string=mb_convert_encoding($string,'UTF-8','HTML-ENTITIES');
386
			$string=my_mysql_iconv($string, 'utf8', 'gb2312');
387
		}
388
	} elseif (strcmp($charset,"ISO-8859-11") == 0) {
389
		if(function_exists('iconv')) {
390
			$string=mb_convert_encoding($string,'UTF-8','HTML-ENTITIES');
391
			$string=iconv("UTF-8","ISO-8859-11",$string);
392
		} else {
393
			$string=mb_convert_encoding($string,'UTF-8','HTML-ENTITIES');
394
			$string=my_mysql_iconv($string, 'utf8', 'tis620');
395
		}
396
	} else {
397
		$string=mb_convert_encoding($string,$charset,'HTML-ENTITIES');
398
	}
399
	$string=htmlspecialchars($string);
400
	return($string);
401
}
402

  
403
// Function to convert a string from umlauts to html-entities
404
// and encode htmlspecialchars
405
function umlauts_to_entities($string) {
406
	$charset=get_wbcharset();
407
	// there's no GB2312 or ISO-8859-11 encoding in php's mb_* functions
408
	if (strcmp($charset,"GB2312") == 0) {
409
		if(function_exists('iconv')) {
410
			$string=iconv("GB2312","UTF-8",$string);
411
			$charset="UTF-8";
412
		} else {
413
			$string=my_mysql_iconv($string, 'gb2312', 'utf8');
414
			$charset="UTF-8";
415
		}
416
	} elseif (strcmp($charset,"ISO-8859-11") == 0) {
417
		if(function_exists('iconv')) {
418
			$string=iconv("ISO-8859-11","UTF-8",$string);
419
			$charset="UTF-8";
420
		} else {
421
			$string=my_mysql_iconv($string, 'tis620', 'utf8');
422
			$charset="UTF-8";
423
		}
424
	}
425
	$string=mb_convert_encoding($string,'HTML-ENTITIES',$charset);
426
	$string=mb_convert_encoding($string,'UTF-8','HTML-ENTITIES');
427
	$string=htmlspecialchars($string,ENT_QUOTES);
428
	$string=mb_convert_encoding($string,'HTML-ENTITIES','UTF-8');
429
	return($string);
430
}
431

  
432
// translate any "latin" html-entities to their plain 7bit equivalents
433
function entities_to_7bit($string) {
434
	require(WB_PATH.'/framework/convert.php');
435
	$string = strtr($string, $conversion_array);
436
	return($string);
437
}
438

  
341 439
// Function to convert a page title to a page filename
342 440
function page_filename($string) {
343
	// First, translate any non-english characters to their english equivalents
344
	require(WB_PATH.'/framework/convert.php');
345
   $string = strtr($string, $conversion_array);
441
	$string = entities_to_7bit(umlauts_to_entities($string));
346 442
	// Now replace spaces with page spcacer
347 443
	$string = str_replace(' ', PAGE_SPACER, $string);
348 444
	// Now remove all bad characters
......
371 467

  
372 468
// Function to convert a desired media filename to a clean filename
373 469
function media_filename($string) {
374
	// First, translate any non-english characters to their english equivalents
375
	require(WB_PATH.'/framework/convert.php');
376
   $string = strtr($string, $conversion_array);
470
	$string = entities_to_7bit(umlauts_to_entities($string));
377 471
	// Now remove all bad characters
378 472
	$bad = array(
379 473
	'\'', // '

Also available in: Unified diff