Revision 454
Added by Matthias over 18 years ago
| functions.php | ||
|---|---|---|
| 341 | 341 |
// Function as replecement for php's htmlspecialchars() |
| 342 | 342 |
function my_htmlspecialchars($string) {
|
| 343 | 343 |
$string = umlauts_to_entities($string); |
| 344 |
$string = entities_to_umlauts($string); |
|
| 344 |
$string = entities_to_umlauts($string, DEFAULT_CHARSET, 1);
|
|
| 345 | 345 |
return($string); |
| 346 | 346 |
} |
| 347 | 347 |
|
| ... | ... | |
| 363 | 363 |
return $converted_string; |
| 364 | 364 |
} |
| 365 | 365 |
|
| 366 |
// Function as wrapper for mb_convert_encoding |
|
| 367 |
function mb_convert_encoding_wrapper($string, $charset_out, $charset_in) {
|
|
| 368 |
if (function_exists('mb_convert_encoding')) {
|
|
| 369 |
$string=mb_convert_encoding($string, $charset_out, $charset_in); |
|
| 370 |
} else {
|
|
| 371 |
if ($charset_in == 'ISO-8859-1') { $mysqlcharset_from = 'latin1'; }
|
|
| 372 |
elseif ($charset_in == 'ISO-8859-2') { $mysqlcharset_from = 'latin2'; }
|
|
| 373 |
elseif ($charset_in == 'ISO-8859-3') { $mysqlcharset_from = 'latin1'; } //?
|
|
| 374 |
elseif ($charset_in == 'ISO-8859-4') { $mysqlcharset_from = 'latin7'; }
|
|
| 375 |
elseif ($charset_in == 'ISO-8859-5') { $string = convert_cyr_string ($string, "iso8859-5", "windows-1251" ); $mysqlcharset_from = 'cp1251'; }
|
|
| 376 |
elseif ($charset_in == 'ISO-8859-6') { $mysqlcharset_from = 'latin1'; } //? BROKEN
|
|
| 377 |
elseif ($charset_in == 'ISO-8859-7') { $mysqlcharset_from = 'greek'; }
|
|
| 378 |
elseif ($charset_in == 'ISO-8859-8') { $mysqlcharset_from = 'hebrew'; }
|
|
| 379 |
elseif ($charset_in == 'ISO-8859-9') { $mysqlcharset_from = 'latin5'; }
|
|
| 380 |
elseif ($charset_in == 'ISO-8859-10') { $mysqlcharset_from = 'latin1'; } //?
|
|
| 381 |
elseif ($charset_in == 'BIG5') { $mysqlcharset_from = 'big5'; }
|
|
| 382 |
elseif ($charset_in == 'ISO-2022-JP') { $mysqlcharset_from = 'latin1'; } //? BROKEN
|
|
| 383 |
elseif ($charset_in == 'ISO-2022-KR') { $mysqlcharset_from = 'latin1'; } //? BROKEN
|
|
| 384 |
elseif ($charset_in == 'GB2312') { $mysqlcharset_from = 'gb2312'; }
|
|
| 385 |
elseif ($charset_in == 'ISO-8859-11') { $mysqlcharset_from = 'tis620'; }
|
|
| 386 |
elseif ($charset_in == 'UTF-8') { $mysqlcharset_from = 'utf8'; }
|
|
| 387 |
else { $mysqlcharset_from = 'latin1'; }
|
|
| 388 |
|
|
| 389 |
if ($charset_out == 'ISO-8859-1') { $mysqlcharset_to = 'latin1'; }
|
|
| 390 |
elseif ($charset_out == 'ISO-8859-2') { $mysqlcharset_to = 'latin2'; }
|
|
| 391 |
elseif ($charset_out == 'ISO-8859-3') { $mysqlcharset_to = 'latin1'; } //?
|
|
| 392 |
elseif ($charset_out == 'ISO-8859-4') { $mysqlcharset_to = 'latin7'; }
|
|
| 393 |
elseif ($charset_out == 'ISO-8859-5') { $mysqlcharset_to = 'cp1251'; } // use convert_cyr_string afterwards
|
|
| 394 |
elseif ($charset_out == 'ISO-8859-6') { $mysqlcharset_to = 'latin1'; } //? BROKEN
|
|
| 395 |
elseif ($charset_out == 'ISO-8859-7') { $mysqlcharset_to = 'greek'; }
|
|
| 396 |
elseif ($charset_out == 'ISO-8859-8') { $mysqlcharset_to = 'hebrew'; }
|
|
| 397 |
elseif ($charset_out == 'ISO-8859-9') { $mysqlcharset_to = 'latin5'; }
|
|
| 398 |
elseif ($charset_out == 'ISO-8859-10') { $mysqlcharset_to = 'latin1'; } //?
|
|
| 399 |
elseif ($charset_out == 'BIG5') { $mysqlcharset_to = 'big5'; }
|
|
| 400 |
elseif ($charset_out == 'ISO-2022-JP') { $mysqlcharset_to = 'latin1'; } //? BROKEN
|
|
| 401 |
elseif ($charset_out == 'ISO-2022-KR') { $mysqlcharset_to = 'latin1'; } //? BROKEN
|
|
| 402 |
elseif ($charset_out == 'GB2312') { $mysqlcharset_to = 'gb2312'; }
|
|
| 403 |
elseif ($charset_out == 'ISO-8859-11') { $mysqlcharset_to = 'tis620'; }
|
|
| 404 |
elseif ($charset_out == 'UTF-8') { $mysqlcharset_to = 'utf8'; }
|
|
| 405 |
else { $mysqlcharset_to = 'latin1'; }
|
|
| 406 |
|
|
| 407 |
if ($charset_in == 'HTML-ENTITIES') { $mysqlcharset_from = 'html'; } // special-case
|
|
| 408 |
if ($charset_out == 'HTML-ENTITIES') { $mysqlcharset_to = 'html'; } // special-case
|
|
| 409 |
|
|
| 410 |
// use mysql to convert the string |
|
| 411 |
if ($mysqlcharset_from!="html" && $mysqlcharset_to!="html" && $mysqlcharset_from!="" && $mysqlcharset_to!="" && $mysqlcharset_from!=$mysqlcharset_to) {
|
|
| 412 |
$string=my_mysql_iconv($string, $mysqlcharset_from, $mysqlcharset_to); |
|
| 413 |
if ($mysqlcharset_to == 'cp1251') {
|
|
| 414 |
$string = convert_cyr_string ($string, "windows-1251", "iso-8859-5" ); |
|
| 415 |
} |
|
| 416 |
} |
|
| 417 |
// do the utf8->htmlentities or htmlentities->utf8 translation |
|
| 418 |
if (($mysqlcharset_from=='html' && $mysqlcharset_to=='utf8') || ($mysqlcharset_from=='utf8' && $mysqlcharset_to=='html')) {
|
|
| 419 |
if ($mysqlcharset_from == 'html') {
|
|
| 420 |
$named_to_numbered_entities=array('Á'=>'Á','á'=>'á','Â'=>'Â',
|
|
| 421 |
'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à', |
|
| 422 |
'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä', |
|
| 423 |
'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é', |
|
| 424 |
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë', |
|
| 425 |
'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î', |
|
| 426 |
'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ', |
|
| 427 |
'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô', |
|
| 428 |
'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ', |
|
| 429 |
'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š', |
|
| 430 |
'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û', |
|
| 431 |
'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý', |
|
| 432 |
'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®', |
|
| 433 |
'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð', |
|
| 434 |
'ø'=>'ø','þ'=>'þ'); |
|
| 435 |
$string = strtr($string, $named_to_numbered_entities); |
|
| 436 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
|
|
| 437 |
} |
|
| 438 |
elseif ($mysqlcharset_to == 'html') {
|
|
| 439 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
|
|
| 440 |
$char = ""; |
|
| 441 |
while (strlen($string) > 0) {
|
|
| 442 |
preg_match("/^(.)(.*)$/su", $string, $match);
|
|
| 443 |
if (strlen($match[1]) > 1) {
|
|
| 444 |
$char .= "&#".uniord($match[1]).";"; |
|
| 445 |
} else $char .= $match[1]; |
|
| 446 |
$string = $match[2]; |
|
| 447 |
} |
|
| 448 |
$string = $char; |
|
| 449 |
$string_htmlspecialchars_decode=array("<"=>"<", ">"=>">", "&"=>"&", """=>"\"", "''"=>"\'");
|
|
| 450 |
$string = strtr($string, $string_htmlspecialchars_decode); |
|
| 451 |
$numbered_to_named_entities=array('Á'=>'Á','á'=>'á','Â'=>'Â',
|
|
| 452 |
'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à', |
|
| 453 |
'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä', |
|
| 454 |
'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é', |
|
| 455 |
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë', |
|
| 456 |
'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î', |
|
| 457 |
'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ', |
|
| 458 |
'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô', |
|
| 459 |
'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ', |
|
| 460 |
'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š', |
|
| 461 |
'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û', |
|
| 462 |
'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý', |
|
| 463 |
'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®', |
|
| 464 |
'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð', |
|
| 465 |
'ø'=>'ø','þ'=>'þ'); |
|
| 466 |
$string = strtr($string, $numbered_to_named_entities); |
|
| 467 |
} |
|
| 468 |
} |
|
| 469 |
} |
|
| 470 |
return($string); |
|
| 471 |
} |
|
| 472 |
// support-function for mb_convert_encoding_wrapper() |
|
| 473 |
function uniord($c) {
|
|
| 474 |
$ud = 0; |
|
| 475 |
if (ord($c{0}) >= 0 && ord($c{0}) <= 127) $ud = ord($c{0});
|
|
| 476 |
if (ord($c{0}) >= 192 && ord($c{0}) <= 223) $ud = (ord($c{0})-192)*64 + (ord($c{1})-128);
|
|
| 477 |
if (ord($c{0}) >= 224 && ord($c{0}) <= 239) $ud = (ord($c{0})-224)*4096 + (ord($c{1})-128)*64 + (ord($c{2})-128);
|
|
| 478 |
if (ord($c{0}) >= 240 && ord($c{0}) <= 247) $ud = (ord($c{0})-240)*262144 + (ord($c{1})-128)*4096 + (ord($c{2})-128)*64 + (ord($c{3})-128);
|
|
| 479 |
if (ord($c{0}) >= 248 && ord($c{0}) <= 251) $ud = (ord($c{0})-248)*16777216 + (ord($c{1})-128)*262144 + (ord($c{2})-128)*4096 + (ord($c{3})-128)*64 + (ord($c{4})-128);
|
|
| 480 |
if (ord($c{0}) >= 252 && ord($c{0}) <= 253) $ud = (ord($c{0})-252)*1073741824 + (ord($c{1})-128)*16777216 + (ord($c{2})-128)*262144 + (ord($c{3})-128)*4096 + (ord($c{4})-128)*64 + (ord($c{5})-128);
|
|
| 481 |
if (ord($c{0}) >= 254 && ord($c{0}) <= 255) $ud = false; // error
|
|
| 482 |
return $ud; |
|
| 483 |
} |
|
| 484 |
// support-function for mb_convert_encoding_wrapper() |
|
| 485 |
function code_to_utf8($num) {
|
|
| 486 |
if ($num <= 0x7F) {
|
|
| 487 |
return chr($num); |
|
| 488 |
} elseif ($num <= 0x7FF) {
|
|
| 489 |
return chr(($num >> 6) + 192) . chr(($num & 63) + 128); |
|
| 490 |
} elseif ($num <= 0xFFFF) {
|
|
| 491 |
return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128); |
|
| 492 |
} elseif ($num <= 0x1FFFFF) {
|
|
| 493 |
return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128); |
|
| 494 |
} |
|
| 495 |
return " "; |
|
| 496 |
} |
|
| 497 |
|
|
| 366 | 498 |
// Function to convert a string from mixed html-entities/umlauts to pure utf-8-umlauts |
| 367 | 499 |
function string_to_utf8($string, $charset=DEFAULT_CHARSET) {
|
| 368 | 500 |
$charset = strtoupper($charset); |
| ... | ... | |
| 373 | 505 |
$string=my_mysql_iconv($string, 'gb2312', 'utf8'); |
| 374 | 506 |
} elseif ($charset == "ISO-8859-11") {
|
| 375 | 507 |
$string=my_mysql_iconv($string, 'tis620', 'utf8'); |
| 376 |
} else {
|
|
| 377 |
$string=mb_convert_encoding($string, 'UTF-8', $charset); |
|
| 508 |
} elseif ($charset != "UTF-8") {
|
|
| 509 |
$string=mb_convert_encoding_wrapper($string, 'UTF-8', $charset);
|
|
| 378 | 510 |
} |
| 379 |
$string=mb_convert_encoding($string, 'HTML-ENTITIES', 'UTF-8'); |
|
| 380 |
$string=mb_convert_encoding($string, 'UTF-8', 'HTML-ENTITIES'); |
|
| 511 |
$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8');
|
|
| 512 |
$string=mb_convert_encoding_wrapper($string, 'UTF-8', 'HTML-ENTITIES');
|
|
| 381 | 513 |
return($string); |
| 382 | 514 |
} |
| 383 | 515 |
|
| ... | ... | |
| 394 | 526 |
} elseif ($charset_out == "ISO-8859-11") {
|
| 395 | 527 |
$string=my_mysql_iconv($string, 'utf8', 'tis620'); |
| 396 | 528 |
} else {
|
| 397 |
$string=mb_convert_encoding($string, $charset_out, 'UTF-8'); |
|
| 529 |
$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8');
|
|
| 398 | 530 |
} |
| 399 | 531 |
} |
| 400 | 532 |
if($convert_htmlspecialchars == 1) {
|
| ... | ... | |
| 413 | 545 |
if($convert_htmlspecialchars == 1) {
|
| 414 | 546 |
$string=htmlspecialchars($string,ENT_QUOTES); |
| 415 | 547 |
} |
| 416 |
$string=mb_convert_encoding($string,'HTML-ENTITIES','UTF-8'); |
|
| 548 |
$string=mb_convert_encoding_wrapper($string,'HTML-ENTITIES','UTF-8');
|
|
| 417 | 549 |
return($string); |
| 418 | 550 |
} |
| 419 | 551 |
|
Also available in: Unified diff
Added wrapper to functions.php if mb_string is not available