Revision 464
Added by Matthias over 18 years ago
| trunk/wb/framework/functions.php | ||
|---|---|---|
| 367 | 367 |
// converts $charset_in to $charset_out or |
| 368 | 368 |
// UTF-8 to HTML-ENTITIES or HTML-ENTITIES to UTF-8 |
| 369 | 369 |
function mb_convert_encoding_wrapper($string, $charset_out, $charset_in) {
|
| 370 |
if ($charset_out == $charset_in) {
|
|
| 371 |
return $string; |
|
| 372 |
} |
|
| 370 | 373 |
// try mb_convert_encoding(). This can handle to or from HTML-ENTITIES, too |
| 371 | 374 |
if (function_exists('mb_convert_encoding')) {
|
| 372 | 375 |
// there's no GB2312 or ISO-8859-11 encoding in php's mb_* functions |
| ... | ... | |
| 561 | 564 |
|
| 562 | 565 |
if (!is_UTF8($string)) {
|
| 563 | 566 |
$string=mb_convert_encoding_wrapper($string, 'UTF-8', $charset); |
| 564 |
} else {
|
|
| 565 | 567 |
} |
| 566 |
|
|
| 567 |
// check if we really get UTF-8. We don't get UTF-8 if charset is ISO-8859-11 or GB2312 and mb_string AND iconv aren't available.
|
|
| 568 |
// check if we really get UTF-8. We don't get UTF-8 if charset is ISO-8859-6 or ISO-2022-JP/KR |
|
| 569 |
// and mb_string AND iconv aren't available. |
|
| 568 | 570 |
if (is_UTF8($string)) {
|
| 569 | 571 |
$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8'); |
| 570 | 572 |
$string=mb_convert_encoding_wrapper($string, 'UTF-8', 'HTML-ENTITIES'); |
| 571 | 573 |
} else {
|
| 574 |
// nothing we can do here :-( |
|
| 572 | 575 |
} |
| 573 | 576 |
return($string); |
| 574 | 577 |
} |
| 575 | 578 |
|
| 576 | 579 |
// function to check if a string is UTF-8 |
| 577 |
function is_UTF8 ($string) {
|
|
| 578 |
return preg_match('%^(?:[\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$%xs', $string);
|
|
| 580 |
function is_UTF8 ($str) {
|
|
| 581 |
if (strlen($str) < 4000) {
|
|
| 582 |
// see http://bugs.php.net/bug.php?id=24460 and http://bugs.php.net/bug.php?id=27070 and http://ilia.ws/archives/5-Top-10-ways-to-crash-PHP.html for this. |
|
| 583 |
// 4000 works for me ... |
|
| 584 |
return preg_match('/^(?:[\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$/s', $str);
|
|
| 585 |
} else {
|
|
| 586 |
$isUTF8 = true; |
|
| 587 |
while($str{0}) {
|
|
| 588 |
if (preg_match("/^[\x09\x0A\x0D\x20-\x7E]/", $str)) { $str = substr($str, 1); continue; }
|
|
| 589 |
if (preg_match("/^[\xC2-\xDF][\x80-\xBF]/", $str)) { $str = substr($str, 2); continue; }
|
|
| 590 |
if (preg_match("/^\xE0[\xA0-\xBF][\x80-\xBF]/", $str)) { $str = substr($str, 3); continue; }
|
|
| 591 |
if (preg_match("/^[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}/", $str)) { $str = substr($str, 3); continue; }
|
|
| 592 |
if (preg_match("/^\xED[\x80-\x9F][\x80-\xBF]/", $str)) { $str = substr($str, 3); continue; }
|
|
| 593 |
if (preg_match("/^\xF0[\x90-\xBF][\x80-\xBF]{2}/", $str)) { $str = substr($str, 4); continue; }
|
|
| 594 |
if (preg_match("/^[\xF1-\xF3][\x80-\xBF]{3}/", $str)) { $str = substr($str, 4); continue; }
|
|
| 595 |
if (preg_match("/^\xF4[\x80-\x8F][\x80-\xBF]{2}/", $str)) { $str = substr($str, 4); continue; }
|
|
| 596 |
if (preg_match("/^$/", $str)) { break; }
|
|
| 597 |
$isUTF8 = false; |
|
| 598 |
break; |
|
| 599 |
} |
|
| 600 |
return ($isUTF8); |
|
| 601 |
} |
|
| 579 | 602 |
} |
| 580 | 603 |
|
| 581 | 604 |
// Function to convert a string from mixed html-entities/umlauts to pure $charset_out-umlauts |
Also available in: Unified diff
Fixed some possible page crashes wich are caused from php if strlen is used (see http://bugs.php.net/bug.php?id=24460 and http://bugs.php.net/bug.php?id=27070 and http://ilia.ws/archives/5-Top-10-ways-to-crash-PHP.html for this) Thanks to Thorn