Revision 463
Added by Matthias over 17 years ago
functions.php | ||
---|---|---|
364 | 364 |
} |
365 | 365 |
|
366 | 366 |
// Function as wrapper for mb_convert_encoding |
367 |
// converts $charset_in to $charset_out or |
|
368 |
// UTF-8 to HTML-ENTITIES or HTML-ENTITIES to UTF-8 |
|
367 | 369 |
function mb_convert_encoding_wrapper($string, $charset_out, $charset_in) { |
370 |
// try mb_convert_encoding(). This can handle to or from HTML-ENTITIES, too |
|
368 | 371 |
if (function_exists('mb_convert_encoding')) { |
369 |
$string=mb_convert_encoding($string, $charset_out, $charset_in); |
|
370 |
} else { |
|
371 |
if ($charset_in == 'ISO-8859-1') { $mysqlcharset_from = 'latin1'; } |
|
372 |
elseif ($charset_in == 'ISO-8859-2') { $mysqlcharset_from = 'latin2'; } |
|
373 |
elseif ($charset_in == 'ISO-8859-3') { $mysqlcharset_from = 'latin1'; } //? |
|
374 |
elseif ($charset_in == 'ISO-8859-4') { $mysqlcharset_from = 'latin7'; } |
|
375 |
elseif ($charset_in == 'ISO-8859-5') { $string = convert_cyr_string ($string, "iso8859-5", "windows-1251" ); $mysqlcharset_from = 'cp1251'; } |
|
376 |
elseif ($charset_in == 'ISO-8859-6') { $mysqlcharset_from = 'latin1'; } //? BROKEN |
|
377 |
elseif ($charset_in == 'ISO-8859-7') { $mysqlcharset_from = 'greek'; } |
|
378 |
elseif ($charset_in == 'ISO-8859-8') { $mysqlcharset_from = 'hebrew'; } |
|
379 |
elseif ($charset_in == 'ISO-8859-9') { $mysqlcharset_from = 'latin5'; } |
|
380 |
elseif ($charset_in == 'ISO-8859-10') { $mysqlcharset_from = 'latin1'; } //? |
|
381 |
elseif ($charset_in == 'BIG5') { $mysqlcharset_from = 'big5'; } |
|
382 |
elseif ($charset_in == 'ISO-2022-JP') { $mysqlcharset_from = 'latin1'; } //? BROKEN |
|
383 |
elseif ($charset_in == 'ISO-2022-KR') { $mysqlcharset_from = 'latin1'; } //? BROKEN |
|
384 |
elseif ($charset_in == 'GB2312') { $mysqlcharset_from = 'gb2312'; } |
|
385 |
elseif ($charset_in == 'ISO-8859-11') { $mysqlcharset_from = 'tis620'; } |
|
386 |
elseif ($charset_in == 'UTF-8') { $mysqlcharset_from = 'utf8'; } |
|
387 |
else { $mysqlcharset_from = 'latin1'; } |
|
388 |
|
|
389 |
if ($charset_out == 'ISO-8859-1') { $mysqlcharset_to = 'latin1'; } |
|
390 |
elseif ($charset_out == 'ISO-8859-2') { $mysqlcharset_to = 'latin2'; } |
|
391 |
elseif ($charset_out == 'ISO-8859-3') { $mysqlcharset_to = 'latin1'; } //? |
|
392 |
elseif ($charset_out == 'ISO-8859-4') { $mysqlcharset_to = 'latin7'; } |
|
393 |
elseif ($charset_out == 'ISO-8859-5') { $mysqlcharset_to = 'cp1251'; } // use convert_cyr_string afterwards |
|
394 |
elseif ($charset_out == 'ISO-8859-6') { $mysqlcharset_to = 'latin1'; } //? BROKEN |
|
395 |
elseif ($charset_out == 'ISO-8859-7') { $mysqlcharset_to = 'greek'; } |
|
396 |
elseif ($charset_out == 'ISO-8859-8') { $mysqlcharset_to = 'hebrew'; } |
|
397 |
elseif ($charset_out == 'ISO-8859-9') { $mysqlcharset_to = 'latin5'; } |
|
398 |
elseif ($charset_out == 'ISO-8859-10') { $mysqlcharset_to = 'latin1'; } //? |
|
399 |
elseif ($charset_out == 'BIG5') { $mysqlcharset_to = 'big5'; } |
|
400 |
elseif ($charset_out == 'ISO-2022-JP') { $mysqlcharset_to = 'latin1'; } //? BROKEN |
|
401 |
elseif ($charset_out == 'ISO-2022-KR') { $mysqlcharset_to = 'latin1'; } //? BROKEN |
|
402 |
elseif ($charset_out == 'GB2312') { $mysqlcharset_to = 'gb2312'; } |
|
403 |
elseif ($charset_out == 'ISO-8859-11') { $mysqlcharset_to = 'tis620'; } |
|
404 |
elseif ($charset_out == 'UTF-8') { $mysqlcharset_to = 'utf8'; } |
|
405 |
else { $mysqlcharset_to = 'latin1'; } |
|
406 |
|
|
407 |
if ($charset_in == 'HTML-ENTITIES') { $mysqlcharset_from = 'html'; } // special-case |
|
408 |
if ($charset_out == 'HTML-ENTITIES') { $mysqlcharset_to = 'html'; } // special-case |
|
409 |
|
|
410 |
// use mysql to convert the string |
|
411 |
if ($mysqlcharset_from!="html" && $mysqlcharset_to!="html" && $mysqlcharset_from!="" && $mysqlcharset_to!="" && $mysqlcharset_from!=$mysqlcharset_to) { |
|
412 |
$string=my_mysql_iconv($string, $mysqlcharset_from, $mysqlcharset_to); |
|
413 |
if ($mysqlcharset_to == 'cp1251') { |
|
414 |
$string = convert_cyr_string ($string, "windows-1251", "iso-8859-5" ); |
|
372 |
// there's no GB2312 or ISO-8859-11 encoding in php's mb_* functions |
|
373 |
if ($charset_in=='ISO-8859-11' || $charset_in=='GB2312') { |
|
374 |
if (function_exists('iconv')) { |
|
375 |
$string = iconv($charset_in, 'UTF-8', $string); |
|
415 | 376 |
} |
377 |
else { |
|
378 |
if ($charset_in == 'GB2312') { |
|
379 |
$string=my_mysql_iconv($string, 'gb2312', 'utf8'); |
|
380 |
} else { |
|
381 |
$string=my_mysql_iconv($string, 'tis620', 'utf8'); |
|
382 |
} |
|
383 |
} |
|
384 |
$charset_in='UTF-8'; |
|
385 |
if ($charset_out == 'UTF-8') { |
|
386 |
return $string; |
|
387 |
} |
|
416 | 388 |
} |
417 |
// do the utf8->htmlentities or htmlentities->utf8 translation |
|
418 |
if (($mysqlcharset_from=='html' && $mysqlcharset_to=='utf8') || ($mysqlcharset_from=='utf8' && $mysqlcharset_to=='html')) { |
|
419 |
if ($mysqlcharset_from == 'html') { |
|
420 |
$named_to_numbered_entities=array('Á'=>'Á','á'=>'á','Â'=>'Â', |
|
421 |
'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à', |
|
422 |
'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä', |
|
423 |
'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é', |
|
424 |
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë', |
|
425 |
'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î', |
|
426 |
'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ', |
|
427 |
'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô', |
|
428 |
'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ', |
|
429 |
'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š', |
|
430 |
'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û', |
|
431 |
'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý', |
|
432 |
'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®', |
|
433 |
'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð', |
|
434 |
'ø'=>'ø','þ'=>'þ'); |
|
435 |
$string = strtr($string, $named_to_numbered_entities); |
|
436 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string); |
|
389 |
if ($charset_out=='ISO-8859-11' || $charset_out=='GB2312') { |
|
390 |
$string=mb_convert_encoding($string, 'UTF-8', $charset_in); |
|
391 |
if (function_exists('iconv')) { |
|
392 |
$string = iconv('UTF-8', $charset_out, $string); |
|
437 | 393 |
} |
438 |
elseif ($mysqlcharset_to == 'html') { |
|
439 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string); |
|
440 |
$char = ""; |
|
441 |
while (strlen($string) > 0) { |
|
442 |
preg_match("/^(.)(.*)$/su", $string, $match); |
|
443 |
if (strlen($match[1]) > 1) { |
|
444 |
$char .= "&#".uniord($match[1]).";"; |
|
445 |
} else $char .= $match[1]; |
|
446 |
$string = $match[2]; |
|
394 |
else { |
|
395 |
if ($charset_out == 'GB2312') { |
|
396 |
$string=my_mysql_iconv($string, 'utf8', 'gb2312'); |
|
397 |
} else { |
|
398 |
$string=my_mysql_iconv($string, 'utf8', 'tis620'); |
|
447 | 399 |
} |
448 |
$string = $char; |
|
449 |
$string_htmlspecialchars_decode=array("<"=>"<", ">"=>">", "&"=>"&", """=>"\"", "''"=>"\'"); |
|
450 |
$string = strtr($string, $string_htmlspecialchars_decode); |
|
451 |
$numbered_to_named_entities=array('Á'=>'Á','á'=>'á','Â'=>'Â', |
|
452 |
'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à', |
|
453 |
'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä', |
|
454 |
'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é', |
|
455 |
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë', |
|
456 |
'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î', |
|
457 |
'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ', |
|
458 |
'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô', |
|
459 |
'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ', |
|
460 |
'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š', |
|
461 |
'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û', |
|
462 |
'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý', |
|
463 |
'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®', |
|
464 |
'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð', |
|
465 |
'ø'=>'ø','þ'=>'þ'); |
|
466 |
$string = strtr($string, $numbered_to_named_entities); |
|
467 | 400 |
} |
401 |
} else { |
|
402 |
$string=mb_convert_encoding($string, $charset_out, $charset_in); |
|
468 | 403 |
} |
404 |
return $string; |
|
469 | 405 |
} |
470 |
return($string); |
|
406 |
|
|
407 |
// try iconv(). This can't handle to or from HTML-ENTITIES. |
|
408 |
if (function_exists('iconv') && $charset_out!='HTML-ENTITIES' && $charset_in!='HTML-ENTITIES' ) { |
|
409 |
$string = iconv($charset_in, $charset_out, $string); |
|
410 |
return $string; |
|
411 |
} |
|
412 |
|
|
413 |
// do the UTF-8->HTML-ENTITIES or HTML-ENTITIES->UTF-8 translation |
|
414 |
if (($charset_in=='HTML-ENTITIES' && $charset_out=='UTF-8') || ($charset_in=='UTF-8' && $charset_out=='HTML-ENTITIES')) { |
|
415 |
$named_to_numbered_entities=array( |
|
416 |
' '=>' ','¡'=>'¡','¢'=>'¢','£'=>'£','¤'=>'¤', |
|
417 |
'¥'=>'¥','¦'=>'¦','§'=>'§','¨'=>'¨','ª'=>'ª', |
|
418 |
'«'=>'«','¬'=>'¬','­'=>'­','®'=>'®','¯'=>'¯', |
|
419 |
'°'=>'°','±'=>'±','²'=>'²','³'=>'³','´'=>'´', |
|
420 |
'µ'=>'µ','¶'=>'¶','·'=>'·','¸'=>'¸','¹'=>'¹', |
|
421 |
'º'=>'º','»'=>'»','¼'=>'¼','½'=>'½','¾'=>'¾', |
|
422 |
'¿'=>'¿','÷'=>'÷','∅'=>'∅','€'=>'€', |
|
423 |
'Á'=>'Á','á'=>'á','Â'=>'Â', |
|
424 |
'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à', |
|
425 |
'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä', |
|
426 |
'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é', |
|
427 |
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë', |
|
428 |
'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î', |
|
429 |
'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ', |
|
430 |
'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô', |
|
431 |
'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ', |
|
432 |
'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š', |
|
433 |
'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û', |
|
434 |
'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý', |
|
435 |
'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®', |
|
436 |
'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð', |
|
437 |
'ø'=>'ø','þ'=>'þ'); |
|
438 |
$numbered_to_named_entities=array('Á'=>'Á','á'=>'á','Â'=>'Â', |
|
439 |
' '=>' ','¡'=>'¡','¢'=>'¢','£'=>'£','¤'=>'¤', |
|
440 |
'¥'=>'¥','¦'=>'¦','§'=>'§','¨'=>'¨','ª'=>'ª', |
|
441 |
'«'=>'«','¬'=>'¬','­'=>'­','®'=>'®','¯'=>'¯', |
|
442 |
'°'=>'°','±'=>'±','²'=>'²','³'=>'³','´'=>'´', |
|
443 |
'µ'=>'µ','¶'=>'¶','·'=>'·','¸'=>'¸','¹'=>'¹', |
|
444 |
'º'=>'º','»'=>'»','¼'=>'¼','½'=>'½','¾'=>'¾', |
|
445 |
'¿'=>'¿','÷'=>'÷','∅'=>'∅','€'=>'€', |
|
446 |
'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à', |
|
447 |
'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä', |
|
448 |
'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é', |
|
449 |
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë', |
|
450 |
'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î', |
|
451 |
'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ', |
|
452 |
'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô', |
|
453 |
'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ', |
|
454 |
'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š', |
|
455 |
'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û', |
|
456 |
'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý', |
|
457 |
'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®', |
|
458 |
'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð', |
|
459 |
'ø'=>'ø','þ'=>'þ'); |
|
460 |
if ($charset_in == 'HTML-ENTITIES') { |
|
461 |
$string = strtr($string, $named_to_numbered_entities); |
|
462 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string); |
|
463 |
} |
|
464 |
elseif ($charset_out == 'HTML-ENTITIES') { |
|
465 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string); |
|
466 |
$char = ""; |
|
467 |
while (strlen($string) > 0) { |
|
468 |
preg_match("/^(.)(.*)$/su", $string, $match); |
|
469 |
if (strlen($match[1]) > 1) { |
|
470 |
$char .= "&#".uniord($match[1]).";"; |
|
471 |
} else $char .= $match[1]; |
|
472 |
$string = $match[2]; |
|
473 |
} |
|
474 |
$string = $char; |
|
475 |
$string_htmlspecialchars_decode=array("<"=>"<", ">"=>">", "&"=>"&", """=>"\"", "'"=>"\'"); |
|
476 |
$string = strtr($string, $string_htmlspecialchars_decode); |
|
477 |
$string = strtr($string, $numbered_to_named_entities); |
|
478 |
} |
|
479 |
return $string; |
|
480 |
} |
|
481 |
|
|
482 |
// mb_convert_encoding() and iconv() aren't available, so use my_mysql_iconv() |
|
483 |
if ($charset_in == 'ISO-8859-1') { $mysqlcharset_from = 'latin1'; } |
|
484 |
elseif ($charset_in == 'ISO-8859-2') { $mysqlcharset_from = 'latin2'; } |
|
485 |
elseif ($charset_in == 'ISO-8859-3') { $mysqlcharset_from = 'latin1'; } |
|
486 |
elseif ($charset_in == 'ISO-8859-4') { $mysqlcharset_from = 'latin7'; } |
|
487 |
elseif ($charset_in == 'ISO-8859-5') { $string = convert_cyr_string ($string, "iso8859-5", "windows-1251" ); $mysqlcharset_from = 'cp1251'; } |
|
488 |
elseif ($charset_in == 'ISO-8859-6') { $mysqlcharset_from = ''; } //? |
|
489 |
elseif ($charset_in == 'ISO-8859-7') { $mysqlcharset_from = 'greek'; } |
|
490 |
elseif ($charset_in == 'ISO-8859-8') { $mysqlcharset_from = 'hebrew'; } |
|
491 |
elseif ($charset_in == 'ISO-8859-9') { $mysqlcharset_from = 'latin5'; } |
|
492 |
elseif ($charset_in == 'ISO-8859-10') { $mysqlcharset_from = 'latin1'; } |
|
493 |
elseif ($charset_in == 'BIG5') { $mysqlcharset_from = 'big5'; } |
|
494 |
elseif ($charset_in == 'ISO-2022-JP') { $mysqlcharset_from = ''; } //? |
|
495 |
elseif ($charset_in == 'ISO-2022-KR') { $mysqlcharset_from = ''; } //? |
|
496 |
elseif ($charset_in == 'GB2312') { $mysqlcharset_from = 'gb2312'; } |
|
497 |
elseif ($charset_in == 'ISO-8859-11') { $mysqlcharset_from = 'tis620'; } |
|
498 |
elseif ($charset_in == 'UTF-8') { $mysqlcharset_from = 'utf8'; } |
|
499 |
else { $mysqlcharset_from = 'latin1'; } |
|
500 |
|
|
501 |
if ($charset_out == 'ISO-8859-1') { $mysqlcharset_to = 'latin1'; } |
|
502 |
elseif ($charset_out == 'ISO-8859-2') { $mysqlcharset_to = 'latin2'; } |
|
503 |
elseif ($charset_out == 'ISO-8859-3') { $mysqlcharset_to = 'latin1'; } |
|
504 |
elseif ($charset_out == 'ISO-8859-4') { $mysqlcharset_to = 'latin7'; } |
|
505 |
elseif ($charset_out == 'ISO-8859-5') { $mysqlcharset_to = 'cp1251'; } // use convert_cyr_string afterwards |
|
506 |
elseif ($charset_out == 'ISO-8859-6') { $mysqlcharset_to = ''; } //? |
|
507 |
elseif ($charset_out == 'ISO-8859-7') { $mysqlcharset_to = 'greek'; } |
|
508 |
elseif ($charset_out == 'ISO-8859-8') { $mysqlcharset_to = 'hebrew'; } |
|
509 |
elseif ($charset_out == 'ISO-8859-9') { $mysqlcharset_to = 'latin5'; } |
|
510 |
elseif ($charset_out == 'ISO-8859-10') { $mysqlcharset_to = 'latin1'; } |
|
511 |
elseif ($charset_out == 'BIG5') { $mysqlcharset_to = 'big5'; } |
|
512 |
elseif ($charset_out == 'ISO-2022-JP') { $mysqlcharset_to = ''; } //? |
|
513 |
elseif ($charset_out == 'ISO-2022-KR') { $mysqlcharset_to = ''; } //? |
|
514 |
elseif ($charset_out == 'GB2312') { $mysqlcharset_to = 'gb2312'; } |
|
515 |
elseif ($charset_out == 'ISO-8859-11') { $mysqlcharset_to = 'tis620'; } |
|
516 |
elseif ($charset_out == 'UTF-8') { $mysqlcharset_to = 'utf8'; } |
|
517 |
else { $mysqlcharset_to = 'latin1'; } |
|
518 |
|
|
519 |
if ($mysqlcharset_from!="" && $mysqlcharset_to!="" && $mysqlcharset_from!=$mysqlcharset_to) { |
|
520 |
$string=my_mysql_iconv($string, $mysqlcharset_from, $mysqlcharset_to); |
|
521 |
if ($mysqlcharset_to == 'cp1251') { |
|
522 |
$string = convert_cyr_string ($string, "windows-1251", "iso-8859-5" ); |
|
523 |
} |
|
524 |
return($string); |
|
525 |
} |
|
526 |
|
|
527 |
// $string is unchanged. This will happen if we have to deal with ISO-8859-6 or ISO-2022-JP or -KR |
|
528 |
// and mbstring _and_ iconv aren't available. |
|
529 |
return $string; |
|
471 | 530 |
} |
472 | 531 |
// support-function for mb_convert_encoding_wrapper() |
473 | 532 |
function uniord($c) { |
... | ... | |
500 | 559 |
$charset = strtoupper($charset); |
501 | 560 |
if ($charset == '') { $charset = 'ISO-8859-1'; } |
502 | 561 |
|
503 |
// there's no GB2312 or ISO-8859-11 encoding in php's mb_* functions |
|
504 |
if ($charset == "GB2312") { |
|
505 |
$string=my_mysql_iconv($string, 'gb2312', 'utf8'); |
|
506 |
} elseif ($charset == "ISO-8859-11") { |
|
507 |
$string=my_mysql_iconv($string, 'tis620', 'utf8'); |
|
508 |
} elseif ($charset != "UTF-8") { |
|
562 |
if (!is_UTF8($string)) { |
|
509 | 563 |
$string=mb_convert_encoding_wrapper($string, 'UTF-8', $charset); |
564 |
} else { |
|
510 | 565 |
} |
511 |
$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8'); |
|
512 |
$string=mb_convert_encoding_wrapper($string, 'UTF-8', 'HTML-ENTITIES'); |
|
566 |
|
|
567 |
// check if we really get UTF-8. We don't get UTF-8 if charset is ISO-8859-11 or GB2312 and mb_string AND iconv aren't available. |
|
568 |
if (is_UTF8($string)) { |
|
569 |
$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8'); |
|
570 |
$string=mb_convert_encoding_wrapper($string, 'UTF-8', 'HTML-ENTITIES'); |
|
571 |
} else { |
|
572 |
} |
|
513 | 573 |
return($string); |
514 | 574 |
} |
515 | 575 |
|
576 |
// function to check if a string is UTF-8 |
|
577 |
function is_UTF8 ($string) { |
|
578 |
return preg_match('%^(?:[\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$%xs', $string); |
|
579 |
} |
|
580 |
|
|
516 | 581 |
// Function to convert a string from mixed html-entities/umlauts to pure $charset_out-umlauts |
517 | 582 |
function entities_to_umlauts($string, $charset_out=DEFAULT_CHARSET, $convert_htmlspecialchars=0) { |
518 | 583 |
$charset_out = strtoupper($charset_out); |
519 |
if ($charset_out == '') { |
|
520 |
$charset_out = 'ISO-8859-1'; |
|
521 |
} |
|
584 |
if ($charset_out == '') { $charset_out = 'ISO-8859-1'; } |
|
522 | 585 |
$string = string_to_utf8($string); |
523 |
if($charset_out != 'UTF-8') { |
|
524 |
if ($charset_out == "GB2312") { |
|
525 |
$string=my_mysql_iconv($string, 'utf8', 'gb2312'); |
|
526 |
} elseif ($charset_out == "ISO-8859-11") { |
|
527 |
$string=my_mysql_iconv($string, 'utf8', 'tis620'); |
|
528 |
} else { |
|
529 |
$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8'); |
|
530 |
} |
|
531 |
} |
|
532 | 586 |
if($convert_htmlspecialchars == 1) { |
533 | 587 |
$string=htmlspecialchars($string); |
534 | 588 |
} |
589 |
if($charset_out!='UTF-8' && is_UTF8($string)) { |
|
590 |
$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8'); |
|
591 |
} |
|
535 | 592 |
return($string); |
536 | 593 |
} |
537 | 594 |
|
538 | 595 |
// Function to convert a string from mixed html-entitites/$charset_in-umlauts to pure html-entities |
539 | 596 |
function umlauts_to_entities($string, $charset_in=DEFAULT_CHARSET, $convert_htmlspecialchars=1) { |
540 | 597 |
$charset_in = strtoupper($charset_in); |
541 |
if ($charset_in == "") { |
|
542 |
$charset_in = 'ISO-8859-1'; |
|
543 |
} |
|
598 |
if ($charset_in == "") { $charset_in = 'ISO-8859-1'; } |
|
544 | 599 |
$string = string_to_utf8($string, $charset_in); |
545 | 600 |
if($convert_htmlspecialchars == 1) { |
546 | 601 |
$string=htmlspecialchars($string,ENT_QUOTES); |
547 | 602 |
} |
548 |
$string=mb_convert_encoding_wrapper($string,'HTML-ENTITIES','UTF-8'); |
|
603 |
if (is_UTF8($string)) { |
|
604 |
$string=mb_convert_encoding_wrapper($string,'HTML-ENTITIES','UTF-8'); |
|
605 |
} |
|
549 | 606 |
return($string); |
550 | 607 |
} |
551 | 608 |
|
552 | 609 |
// translate any latin/greek/cyrillic html-entities to their plain 7bit equivalents |
610 |
// and numbered-entities into hex |
|
553 | 611 |
function entities_to_7bit($string) { |
554 | 612 |
require(WB_PATH.'/framework/convert.php'); |
555 | 613 |
$string = strtr($string, $conversion_array); |
614 |
$string = preg_replace('/&#([0-9]+);/e', "dechex('$1')", $string); |
|
556 | 615 |
return($string); |
557 | 616 |
} |
558 | 617 |
|
Also available in: Unified diff
Added changeset [460] and [461] to branches/2.6.x