Revision 463
Added by Matthias over 18 years ago
| functions.php | ||
|---|---|---|
| 364 | 364 |
} |
| 365 | 365 |
|
| 366 | 366 |
// Function as wrapper for mb_convert_encoding |
| 367 |
// converts $charset_in to $charset_out or |
|
| 368 |
// UTF-8 to HTML-ENTITIES or HTML-ENTITIES to UTF-8 |
|
| 367 | 369 |
function mb_convert_encoding_wrapper($string, $charset_out, $charset_in) {
|
| 370 |
// try mb_convert_encoding(). This can handle to or from HTML-ENTITIES, too |
|
| 368 | 371 |
if (function_exists('mb_convert_encoding')) {
|
| 369 |
$string=mb_convert_encoding($string, $charset_out, $charset_in); |
|
| 370 |
} else {
|
|
| 371 |
if ($charset_in == 'ISO-8859-1') { $mysqlcharset_from = 'latin1'; }
|
|
| 372 |
elseif ($charset_in == 'ISO-8859-2') { $mysqlcharset_from = 'latin2'; }
|
|
| 373 |
elseif ($charset_in == 'ISO-8859-3') { $mysqlcharset_from = 'latin1'; } //?
|
|
| 374 |
elseif ($charset_in == 'ISO-8859-4') { $mysqlcharset_from = 'latin7'; }
|
|
| 375 |
elseif ($charset_in == 'ISO-8859-5') { $string = convert_cyr_string ($string, "iso8859-5", "windows-1251" ); $mysqlcharset_from = 'cp1251'; }
|
|
| 376 |
elseif ($charset_in == 'ISO-8859-6') { $mysqlcharset_from = 'latin1'; } //? BROKEN
|
|
| 377 |
elseif ($charset_in == 'ISO-8859-7') { $mysqlcharset_from = 'greek'; }
|
|
| 378 |
elseif ($charset_in == 'ISO-8859-8') { $mysqlcharset_from = 'hebrew'; }
|
|
| 379 |
elseif ($charset_in == 'ISO-8859-9') { $mysqlcharset_from = 'latin5'; }
|
|
| 380 |
elseif ($charset_in == 'ISO-8859-10') { $mysqlcharset_from = 'latin1'; } //?
|
|
| 381 |
elseif ($charset_in == 'BIG5') { $mysqlcharset_from = 'big5'; }
|
|
| 382 |
elseif ($charset_in == 'ISO-2022-JP') { $mysqlcharset_from = 'latin1'; } //? BROKEN
|
|
| 383 |
elseif ($charset_in == 'ISO-2022-KR') { $mysqlcharset_from = 'latin1'; } //? BROKEN
|
|
| 384 |
elseif ($charset_in == 'GB2312') { $mysqlcharset_from = 'gb2312'; }
|
|
| 385 |
elseif ($charset_in == 'ISO-8859-11') { $mysqlcharset_from = 'tis620'; }
|
|
| 386 |
elseif ($charset_in == 'UTF-8') { $mysqlcharset_from = 'utf8'; }
|
|
| 387 |
else { $mysqlcharset_from = 'latin1'; }
|
|
| 388 |
|
|
| 389 |
if ($charset_out == 'ISO-8859-1') { $mysqlcharset_to = 'latin1'; }
|
|
| 390 |
elseif ($charset_out == 'ISO-8859-2') { $mysqlcharset_to = 'latin2'; }
|
|
| 391 |
elseif ($charset_out == 'ISO-8859-3') { $mysqlcharset_to = 'latin1'; } //?
|
|
| 392 |
elseif ($charset_out == 'ISO-8859-4') { $mysqlcharset_to = 'latin7'; }
|
|
| 393 |
elseif ($charset_out == 'ISO-8859-5') { $mysqlcharset_to = 'cp1251'; } // use convert_cyr_string afterwards
|
|
| 394 |
elseif ($charset_out == 'ISO-8859-6') { $mysqlcharset_to = 'latin1'; } //? BROKEN
|
|
| 395 |
elseif ($charset_out == 'ISO-8859-7') { $mysqlcharset_to = 'greek'; }
|
|
| 396 |
elseif ($charset_out == 'ISO-8859-8') { $mysqlcharset_to = 'hebrew'; }
|
|
| 397 |
elseif ($charset_out == 'ISO-8859-9') { $mysqlcharset_to = 'latin5'; }
|
|
| 398 |
elseif ($charset_out == 'ISO-8859-10') { $mysqlcharset_to = 'latin1'; } //?
|
|
| 399 |
elseif ($charset_out == 'BIG5') { $mysqlcharset_to = 'big5'; }
|
|
| 400 |
elseif ($charset_out == 'ISO-2022-JP') { $mysqlcharset_to = 'latin1'; } //? BROKEN
|
|
| 401 |
elseif ($charset_out == 'ISO-2022-KR') { $mysqlcharset_to = 'latin1'; } //? BROKEN
|
|
| 402 |
elseif ($charset_out == 'GB2312') { $mysqlcharset_to = 'gb2312'; }
|
|
| 403 |
elseif ($charset_out == 'ISO-8859-11') { $mysqlcharset_to = 'tis620'; }
|
|
| 404 |
elseif ($charset_out == 'UTF-8') { $mysqlcharset_to = 'utf8'; }
|
|
| 405 |
else { $mysqlcharset_to = 'latin1'; }
|
|
| 406 |
|
|
| 407 |
if ($charset_in == 'HTML-ENTITIES') { $mysqlcharset_from = 'html'; } // special-case
|
|
| 408 |
if ($charset_out == 'HTML-ENTITIES') { $mysqlcharset_to = 'html'; } // special-case
|
|
| 409 |
|
|
| 410 |
// use mysql to convert the string |
|
| 411 |
if ($mysqlcharset_from!="html" && $mysqlcharset_to!="html" && $mysqlcharset_from!="" && $mysqlcharset_to!="" && $mysqlcharset_from!=$mysqlcharset_to) {
|
|
| 412 |
$string=my_mysql_iconv($string, $mysqlcharset_from, $mysqlcharset_to); |
|
| 413 |
if ($mysqlcharset_to == 'cp1251') {
|
|
| 414 |
$string = convert_cyr_string ($string, "windows-1251", "iso-8859-5" ); |
|
| 372 |
// there's no GB2312 or ISO-8859-11 encoding in php's mb_* functions |
|
| 373 |
if ($charset_in=='ISO-8859-11' || $charset_in=='GB2312') {
|
|
| 374 |
if (function_exists('iconv')) {
|
|
| 375 |
$string = iconv($charset_in, 'UTF-8', $string); |
|
| 415 | 376 |
} |
| 377 |
else {
|
|
| 378 |
if ($charset_in == 'GB2312') {
|
|
| 379 |
$string=my_mysql_iconv($string, 'gb2312', 'utf8'); |
|
| 380 |
} else {
|
|
| 381 |
$string=my_mysql_iconv($string, 'tis620', 'utf8'); |
|
| 382 |
} |
|
| 383 |
} |
|
| 384 |
$charset_in='UTF-8'; |
|
| 385 |
if ($charset_out == 'UTF-8') {
|
|
| 386 |
return $string; |
|
| 387 |
} |
|
| 416 | 388 |
} |
| 417 |
// do the utf8->htmlentities or htmlentities->utf8 translation |
|
| 418 |
if (($mysqlcharset_from=='html' && $mysqlcharset_to=='utf8') || ($mysqlcharset_from=='utf8' && $mysqlcharset_to=='html')) {
|
|
| 419 |
if ($mysqlcharset_from == 'html') {
|
|
| 420 |
$named_to_numbered_entities=array('Á'=>'Á','á'=>'á','Â'=>'Â',
|
|
| 421 |
'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à', |
|
| 422 |
'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä', |
|
| 423 |
'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é', |
|
| 424 |
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë', |
|
| 425 |
'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î', |
|
| 426 |
'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ', |
|
| 427 |
'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô', |
|
| 428 |
'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ', |
|
| 429 |
'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š', |
|
| 430 |
'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û', |
|
| 431 |
'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý', |
|
| 432 |
'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®', |
|
| 433 |
'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð', |
|
| 434 |
'ø'=>'ø','þ'=>'þ'); |
|
| 435 |
$string = strtr($string, $named_to_numbered_entities); |
|
| 436 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
|
|
| 389 |
if ($charset_out=='ISO-8859-11' || $charset_out=='GB2312') {
|
|
| 390 |
$string=mb_convert_encoding($string, 'UTF-8', $charset_in); |
|
| 391 |
if (function_exists('iconv')) {
|
|
| 392 |
$string = iconv('UTF-8', $charset_out, $string);
|
|
| 437 | 393 |
} |
| 438 |
elseif ($mysqlcharset_to == 'html') {
|
|
| 439 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
|
|
| 440 |
$char = ""; |
|
| 441 |
while (strlen($string) > 0) {
|
|
| 442 |
preg_match("/^(.)(.*)$/su", $string, $match);
|
|
| 443 |
if (strlen($match[1]) > 1) {
|
|
| 444 |
$char .= "&#".uniord($match[1]).";"; |
|
| 445 |
} else $char .= $match[1]; |
|
| 446 |
$string = $match[2]; |
|
| 394 |
else {
|
|
| 395 |
if ($charset_out == 'GB2312') {
|
|
| 396 |
$string=my_mysql_iconv($string, 'utf8', 'gb2312'); |
|
| 397 |
} else {
|
|
| 398 |
$string=my_mysql_iconv($string, 'utf8', 'tis620'); |
|
| 447 | 399 |
} |
| 448 |
$string = $char; |
|
| 449 |
$string_htmlspecialchars_decode=array("<"=>"<", ">"=>">", "&"=>"&", """=>"\"", "''"=>"\'");
|
|
| 450 |
$string = strtr($string, $string_htmlspecialchars_decode); |
|
| 451 |
$numbered_to_named_entities=array('Á'=>'Á','á'=>'á','Â'=>'Â',
|
|
| 452 |
'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à', |
|
| 453 |
'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä', |
|
| 454 |
'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é', |
|
| 455 |
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë', |
|
| 456 |
'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î', |
|
| 457 |
'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ', |
|
| 458 |
'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô', |
|
| 459 |
'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ', |
|
| 460 |
'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š', |
|
| 461 |
'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û', |
|
| 462 |
'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý', |
|
| 463 |
'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®', |
|
| 464 |
'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð', |
|
| 465 |
'ø'=>'ø','þ'=>'þ'); |
|
| 466 |
$string = strtr($string, $numbered_to_named_entities); |
|
| 467 | 400 |
} |
| 401 |
} else {
|
|
| 402 |
$string=mb_convert_encoding($string, $charset_out, $charset_in); |
|
| 468 | 403 |
} |
| 404 |
return $string; |
|
| 469 | 405 |
} |
| 470 |
return($string); |
|
| 406 |
|
|
| 407 |
// try iconv(). This can't handle to or from HTML-ENTITIES. |
|
| 408 |
if (function_exists('iconv') && $charset_out!='HTML-ENTITIES' && $charset_in!='HTML-ENTITIES' ) {
|
|
| 409 |
$string = iconv($charset_in, $charset_out, $string); |
|
| 410 |
return $string; |
|
| 411 |
} |
|
| 412 |
|
|
| 413 |
// do the UTF-8->HTML-ENTITIES or HTML-ENTITIES->UTF-8 translation |
|
| 414 |
if (($charset_in=='HTML-ENTITIES' && $charset_out=='UTF-8') || ($charset_in=='UTF-8' && $charset_out=='HTML-ENTITIES')) {
|
|
| 415 |
$named_to_numbered_entities=array( |
|
| 416 |
' '=>' ','¡'=>'¡','¢'=>'¢','£'=>'£','¤'=>'¤', |
|
| 417 |
'¥'=>'¥','¦'=>'¦','§'=>'§','¨'=>'¨','ª'=>'ª', |
|
| 418 |
'«'=>'«','¬'=>'¬','­'=>'­','®'=>'®','¯'=>'¯', |
|
| 419 |
'°'=>'°','±'=>'±','²'=>'²','³'=>'³','´'=>'´', |
|
| 420 |
'µ'=>'µ','¶'=>'¶','·'=>'·','¸'=>'¸','¹'=>'¹', |
|
| 421 |
'º'=>'º','»'=>'»','¼'=>'¼','½'=>'½','¾'=>'¾', |
|
| 422 |
'¿'=>'¿','÷'=>'÷','∅'=>'∅','€'=>'€', |
|
| 423 |
'Á'=>'Á','á'=>'á','Â'=>'Â', |
|
| 424 |
'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à', |
|
| 425 |
'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä', |
|
| 426 |
'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é', |
|
| 427 |
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë', |
|
| 428 |
'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î', |
|
| 429 |
'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ', |
|
| 430 |
'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô', |
|
| 431 |
'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ', |
|
| 432 |
'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š', |
|
| 433 |
'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û', |
|
| 434 |
'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý', |
|
| 435 |
'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®', |
|
| 436 |
'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð', |
|
| 437 |
'ø'=>'ø','þ'=>'þ'); |
|
| 438 |
$numbered_to_named_entities=array('Á'=>'Á','á'=>'á','Â'=>'Â',
|
|
| 439 |
' '=>' ','¡'=>'¡','¢'=>'¢','£'=>'£','¤'=>'¤', |
|
| 440 |
'¥'=>'¥','¦'=>'¦','§'=>'§','¨'=>'¨','ª'=>'ª', |
|
| 441 |
'«'=>'«','¬'=>'¬','­'=>'­','®'=>'®','¯'=>'¯', |
|
| 442 |
'°'=>'°','±'=>'±','²'=>'²','³'=>'³','´'=>'´', |
|
| 443 |
'µ'=>'µ','¶'=>'¶','·'=>'·','¸'=>'¸','¹'=>'¹', |
|
| 444 |
'º'=>'º','»'=>'»','¼'=>'¼','½'=>'½','¾'=>'¾', |
|
| 445 |
'¿'=>'¿','÷'=>'÷','∅'=>'∅','€'=>'€', |
|
| 446 |
'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à', |
|
| 447 |
'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä', |
|
| 448 |
'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é', |
|
| 449 |
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë', |
|
| 450 |
'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î', |
|
| 451 |
'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ', |
|
| 452 |
'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô', |
|
| 453 |
'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ', |
|
| 454 |
'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š', |
|
| 455 |
'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û', |
|
| 456 |
'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý', |
|
| 457 |
'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®', |
|
| 458 |
'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð', |
|
| 459 |
'ø'=>'ø','þ'=>'þ'); |
|
| 460 |
if ($charset_in == 'HTML-ENTITIES') {
|
|
| 461 |
$string = strtr($string, $named_to_numbered_entities); |
|
| 462 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
|
|
| 463 |
} |
|
| 464 |
elseif ($charset_out == 'HTML-ENTITIES') {
|
|
| 465 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
|
|
| 466 |
$char = ""; |
|
| 467 |
while (strlen($string) > 0) {
|
|
| 468 |
preg_match("/^(.)(.*)$/su", $string, $match);
|
|
| 469 |
if (strlen($match[1]) > 1) {
|
|
| 470 |
$char .= "&#".uniord($match[1]).";"; |
|
| 471 |
} else $char .= $match[1]; |
|
| 472 |
$string = $match[2]; |
|
| 473 |
} |
|
| 474 |
$string = $char; |
|
| 475 |
$string_htmlspecialchars_decode=array("<"=>"<", ">"=>">", "&"=>"&", """=>"\"", "'"=>"\'");
|
|
| 476 |
$string = strtr($string, $string_htmlspecialchars_decode); |
|
| 477 |
$string = strtr($string, $numbered_to_named_entities); |
|
| 478 |
} |
|
| 479 |
return $string; |
|
| 480 |
} |
|
| 481 |
|
|
| 482 |
// mb_convert_encoding() and iconv() aren't available, so use my_mysql_iconv() |
|
| 483 |
if ($charset_in == 'ISO-8859-1') { $mysqlcharset_from = 'latin1'; }
|
|
| 484 |
elseif ($charset_in == 'ISO-8859-2') { $mysqlcharset_from = 'latin2'; }
|
|
| 485 |
elseif ($charset_in == 'ISO-8859-3') { $mysqlcharset_from = 'latin1'; }
|
|
| 486 |
elseif ($charset_in == 'ISO-8859-4') { $mysqlcharset_from = 'latin7'; }
|
|
| 487 |
elseif ($charset_in == 'ISO-8859-5') { $string = convert_cyr_string ($string, "iso8859-5", "windows-1251" ); $mysqlcharset_from = 'cp1251'; }
|
|
| 488 |
elseif ($charset_in == 'ISO-8859-6') { $mysqlcharset_from = ''; } //?
|
|
| 489 |
elseif ($charset_in == 'ISO-8859-7') { $mysqlcharset_from = 'greek'; }
|
|
| 490 |
elseif ($charset_in == 'ISO-8859-8') { $mysqlcharset_from = 'hebrew'; }
|
|
| 491 |
elseif ($charset_in == 'ISO-8859-9') { $mysqlcharset_from = 'latin5'; }
|
|
| 492 |
elseif ($charset_in == 'ISO-8859-10') { $mysqlcharset_from = 'latin1'; }
|
|
| 493 |
elseif ($charset_in == 'BIG5') { $mysqlcharset_from = 'big5'; }
|
|
| 494 |
elseif ($charset_in == 'ISO-2022-JP') { $mysqlcharset_from = ''; } //?
|
|
| 495 |
elseif ($charset_in == 'ISO-2022-KR') { $mysqlcharset_from = ''; } //?
|
|
| 496 |
elseif ($charset_in == 'GB2312') { $mysqlcharset_from = 'gb2312'; }
|
|
| 497 |
elseif ($charset_in == 'ISO-8859-11') { $mysqlcharset_from = 'tis620'; }
|
|
| 498 |
elseif ($charset_in == 'UTF-8') { $mysqlcharset_from = 'utf8'; }
|
|
| 499 |
else { $mysqlcharset_from = 'latin1'; }
|
|
| 500 |
|
|
| 501 |
if ($charset_out == 'ISO-8859-1') { $mysqlcharset_to = 'latin1'; }
|
|
| 502 |
elseif ($charset_out == 'ISO-8859-2') { $mysqlcharset_to = 'latin2'; }
|
|
| 503 |
elseif ($charset_out == 'ISO-8859-3') { $mysqlcharset_to = 'latin1'; }
|
|
| 504 |
elseif ($charset_out == 'ISO-8859-4') { $mysqlcharset_to = 'latin7'; }
|
|
| 505 |
elseif ($charset_out == 'ISO-8859-5') { $mysqlcharset_to = 'cp1251'; } // use convert_cyr_string afterwards
|
|
| 506 |
elseif ($charset_out == 'ISO-8859-6') { $mysqlcharset_to = ''; } //?
|
|
| 507 |
elseif ($charset_out == 'ISO-8859-7') { $mysqlcharset_to = 'greek'; }
|
|
| 508 |
elseif ($charset_out == 'ISO-8859-8') { $mysqlcharset_to = 'hebrew'; }
|
|
| 509 |
elseif ($charset_out == 'ISO-8859-9') { $mysqlcharset_to = 'latin5'; }
|
|
| 510 |
elseif ($charset_out == 'ISO-8859-10') { $mysqlcharset_to = 'latin1'; }
|
|
| 511 |
elseif ($charset_out == 'BIG5') { $mysqlcharset_to = 'big5'; }
|
|
| 512 |
elseif ($charset_out == 'ISO-2022-JP') { $mysqlcharset_to = ''; } //?
|
|
| 513 |
elseif ($charset_out == 'ISO-2022-KR') { $mysqlcharset_to = ''; } //?
|
|
| 514 |
elseif ($charset_out == 'GB2312') { $mysqlcharset_to = 'gb2312'; }
|
|
| 515 |
elseif ($charset_out == 'ISO-8859-11') { $mysqlcharset_to = 'tis620'; }
|
|
| 516 |
elseif ($charset_out == 'UTF-8') { $mysqlcharset_to = 'utf8'; }
|
|
| 517 |
else { $mysqlcharset_to = 'latin1'; }
|
|
| 518 |
|
|
| 519 |
if ($mysqlcharset_from!="" && $mysqlcharset_to!="" && $mysqlcharset_from!=$mysqlcharset_to) {
|
|
| 520 |
$string=my_mysql_iconv($string, $mysqlcharset_from, $mysqlcharset_to); |
|
| 521 |
if ($mysqlcharset_to == 'cp1251') {
|
|
| 522 |
$string = convert_cyr_string ($string, "windows-1251", "iso-8859-5" ); |
|
| 523 |
} |
|
| 524 |
return($string); |
|
| 525 |
} |
|
| 526 |
|
|
| 527 |
// $string is unchanged. This will happen if we have to deal with ISO-8859-6 or ISO-2022-JP or -KR |
|
| 528 |
// and mbstring _and_ iconv aren't available. |
|
| 529 |
return $string; |
|
| 471 | 530 |
} |
| 472 | 531 |
// support-function for mb_convert_encoding_wrapper() |
| 473 | 532 |
function uniord($c) {
|
| ... | ... | |
| 500 | 559 |
$charset = strtoupper($charset); |
| 501 | 560 |
if ($charset == '') { $charset = 'ISO-8859-1'; }
|
| 502 | 561 |
|
| 503 |
// there's no GB2312 or ISO-8859-11 encoding in php's mb_* functions |
|
| 504 |
if ($charset == "GB2312") {
|
|
| 505 |
$string=my_mysql_iconv($string, 'gb2312', 'utf8'); |
|
| 506 |
} elseif ($charset == "ISO-8859-11") {
|
|
| 507 |
$string=my_mysql_iconv($string, 'tis620', 'utf8'); |
|
| 508 |
} elseif ($charset != "UTF-8") {
|
|
| 562 |
if (!is_UTF8($string)) {
|
|
| 509 | 563 |
$string=mb_convert_encoding_wrapper($string, 'UTF-8', $charset); |
| 564 |
} else {
|
|
| 510 | 565 |
} |
| 511 |
$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8'); |
|
| 512 |
$string=mb_convert_encoding_wrapper($string, 'UTF-8', 'HTML-ENTITIES'); |
|
| 566 |
|
|
| 567 |
// check if we really get UTF-8. We don't get UTF-8 if charset is ISO-8859-11 or GB2312 and mb_string AND iconv aren't available. |
|
| 568 |
if (is_UTF8($string)) {
|
|
| 569 |
$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8'); |
|
| 570 |
$string=mb_convert_encoding_wrapper($string, 'UTF-8', 'HTML-ENTITIES'); |
|
| 571 |
} else {
|
|
| 572 |
} |
|
| 513 | 573 |
return($string); |
| 514 | 574 |
} |
| 515 | 575 |
|
| 576 |
// function to check if a string is UTF-8 |
|
| 577 |
function is_UTF8 ($string) {
|
|
| 578 |
return preg_match('%^(?:[\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$%xs', $string);
|
|
| 579 |
} |
|
| 580 |
|
|
| 516 | 581 |
// Function to convert a string from mixed html-entities/umlauts to pure $charset_out-umlauts |
| 517 | 582 |
function entities_to_umlauts($string, $charset_out=DEFAULT_CHARSET, $convert_htmlspecialchars=0) {
|
| 518 | 583 |
$charset_out = strtoupper($charset_out); |
| 519 |
if ($charset_out == '') {
|
|
| 520 |
$charset_out = 'ISO-8859-1'; |
|
| 521 |
} |
|
| 584 |
if ($charset_out == '') { $charset_out = 'ISO-8859-1'; }
|
|
| 522 | 585 |
$string = string_to_utf8($string); |
| 523 |
if($charset_out != 'UTF-8') {
|
|
| 524 |
if ($charset_out == "GB2312") {
|
|
| 525 |
$string=my_mysql_iconv($string, 'utf8', 'gb2312'); |
|
| 526 |
} elseif ($charset_out == "ISO-8859-11") {
|
|
| 527 |
$string=my_mysql_iconv($string, 'utf8', 'tis620'); |
|
| 528 |
} else {
|
|
| 529 |
$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8'); |
|
| 530 |
} |
|
| 531 |
} |
|
| 532 | 586 |
if($convert_htmlspecialchars == 1) {
|
| 533 | 587 |
$string=htmlspecialchars($string); |
| 534 | 588 |
} |
| 589 |
if($charset_out!='UTF-8' && is_UTF8($string)) {
|
|
| 590 |
$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8'); |
|
| 591 |
} |
|
| 535 | 592 |
return($string); |
| 536 | 593 |
} |
| 537 | 594 |
|
| 538 | 595 |
// Function to convert a string from mixed html-entitites/$charset_in-umlauts to pure html-entities |
| 539 | 596 |
function umlauts_to_entities($string, $charset_in=DEFAULT_CHARSET, $convert_htmlspecialchars=1) {
|
| 540 | 597 |
$charset_in = strtoupper($charset_in); |
| 541 |
if ($charset_in == "") {
|
|
| 542 |
$charset_in = 'ISO-8859-1'; |
|
| 543 |
} |
|
| 598 |
if ($charset_in == "") { $charset_in = 'ISO-8859-1'; }
|
|
| 544 | 599 |
$string = string_to_utf8($string, $charset_in); |
| 545 | 600 |
if($convert_htmlspecialchars == 1) {
|
| 546 | 601 |
$string=htmlspecialchars($string,ENT_QUOTES); |
| 547 | 602 |
} |
| 548 |
$string=mb_convert_encoding_wrapper($string,'HTML-ENTITIES','UTF-8'); |
|
| 603 |
if (is_UTF8($string)) {
|
|
| 604 |
$string=mb_convert_encoding_wrapper($string,'HTML-ENTITIES','UTF-8'); |
|
| 605 |
} |
|
| 549 | 606 |
return($string); |
| 550 | 607 |
} |
| 551 | 608 |
|
| 552 | 609 |
// translate any latin/greek/cyrillic html-entities to their plain 7bit equivalents |
| 610 |
// and numbered-entities into hex |
|
| 553 | 611 |
function entities_to_7bit($string) {
|
| 554 | 612 |
require(WB_PATH.'/framework/convert.php'); |
| 555 | 613 |
$string = strtr($string, $conversion_array); |
| 614 |
$string = preg_replace('/&#([0-9]+);/e', "dechex('$1')", $string);
|
|
| 556 | 615 |
return($string); |
| 557 | 616 |
} |
| 558 | 617 |
|
Also available in: Unified diff
Added changeset [460] and [461] to branches/2.6.x