Revision 504
Added by Matthias over 18 years ago
| functions.php | ||
|---|---|---|
| 338 | 338 |
return $subs; |
| 339 | 339 |
} |
| 340 | 340 |
|
| 341 |
// Function as replecement for php's htmlspecialchars()
|
|
| 341 |
// Function as replacement for php's htmlspecialchars()
|
|
| 342 | 342 |
function my_htmlspecialchars($string) {
|
| 343 | 343 |
$string = preg_replace("/&(?=[#a-z0-9]+;)/i", "_x_", $string);
|
| 344 | 344 |
$string = strtr($string, array("<"=>"<", ">"=>">", "&"=>"&", "\""=>""", "\'"=>"'"));
|
| ... | ... | |
| 613 | 613 |
); |
| 614 | 614 |
|
| 615 | 615 |
if ($in == 'HTML-ENTITIES') {
|
| 616 |
$string = strtr($string, array('''=>''')); // fix a broken entity
|
|
| 617 | 616 |
$string = strtr($string, $named_to_numbered_entities); |
| 618 | 617 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
|
| 619 | 618 |
} |
| 620 | 619 |
elseif ($out == 'HTML-ENTITIES') {
|
| 621 |
//$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
|
|
| 622 | 620 |
$char = ""; |
| 623 |
while (strlen($string) > 0) {
|
|
| 624 |
preg_match("/^(.)(.*)$/su", $string, $match);
|
|
| 625 |
if (strlen($match[1]) > 1) {
|
|
| 626 |
$char .= "&#".uniord($match[1]).";"; |
|
| 627 |
} else $char .= $match[1]; |
|
| 628 |
$string = $match[2]; |
|
| 629 |
} |
|
| 621 |
$i=0; |
|
| 622 |
$len=strlen($string); |
|
| 623 |
if($len==0) return $string; |
|
| 624 |
do {
|
|
| 625 |
if(ord($string{$i}) <= 127) $ud = $string{$i++};
|
|
| 626 |
elseif(ord($string{$i}) <= 223) $ud = (ord($string{$i++})-192)*64 + (ord($string{$i++})-128);
|
|
| 627 |
elseif(ord($string{$i}) <= 239) $ud = (ord($string{$i++})-224)*4096 + (ord($string{$i++})-128)*64 + (ord($string{$i++})-128);
|
|
| 628 |
elseif(ord($string{$i}) <= 247) $ud = (ord($string{$i++})-240)*262144 + (ord($string{$i++})-128)*4096 + (ord($string{$i++})-128)*64 + (ord($string{$i++})-128);
|
|
| 629 |
elseif(ord($string{$i}) <= 251) $ud = ord($string{$i++}); // error!
|
|
| 630 |
if($ud > 127) {
|
|
| 631 |
$char .= "&#$ud;"; |
|
| 632 |
} else {
|
|
| 633 |
$char .= $ud; |
|
| 634 |
} |
|
| 635 |
} while($i < $len); |
|
| 630 | 636 |
$string = $char; |
| 631 | 637 |
$string = strtr($string, $numbered_to_named_entities); |
| 632 | 638 |
// do ' and " |
| ... | ... | |
| 636 | 642 |
} |
| 637 | 643 |
|
| 638 | 644 |
// support-function for string_decode_encode_entities() |
| 639 |
function uniord($c) {
|
|
| 640 |
$ud = 0; |
|
| 641 |
if (ord($c{0}) >= 0 && ord($c{0}) <= 127) $ud = ord($c{0});
|
|
| 642 |
if (ord($c{0}) >= 192 && ord($c{0}) <= 223) $ud = (ord($c{0})-192)*64 + (ord($c{1})-128);
|
|
| 643 |
if (ord($c{0}) >= 224 && ord($c{0}) <= 239) $ud = (ord($c{0})-224)*4096 + (ord($c{1})-128)*64 + (ord($c{2})-128);
|
|
| 644 |
if (ord($c{0}) >= 240 && ord($c{0}) <= 247) $ud = (ord($c{0})-240)*262144 + (ord($c{1})-128)*4096 + (ord($c{2})-128)*64 + (ord($c{3})-128);
|
|
| 645 |
if (ord($c{0}) >= 248 && ord($c{0}) <= 251) $ud = (ord($c{0})-248)*16777216 + (ord($c{1})-128)*262144 + (ord($c{2})-128)*4096 + (ord($c{3})-128)*64 + (ord($c{4})-128);
|
|
| 646 |
if (ord($c{0}) >= 252 && ord($c{0}) <= 253) $ud = (ord($c{0})-252)*1073741824 + (ord($c{1})-128)*16777216 + (ord($c{2})-128)*262144 + (ord($c{3})-128)*4096 + (ord($c{4})-128)*64 + (ord($c{5})-128);
|
|
| 647 |
if (ord($c{0}) >= 254 && ord($c{0}) <= 255) $ud = false; // error
|
|
| 648 |
return $ud; |
|
| 649 |
} |
|
| 650 |
// support-function for mb_convert_encoding_wrapper() |
|
| 651 | 645 |
function code_to_utf8($num) {
|
| 652 | 646 |
if ($num <= 0x7F) {
|
| 653 | 647 |
return chr($num); |
| ... | ... | |
| 706 | 700 |
} |
| 707 | 701 |
|
| 708 | 702 |
// Function to convert a string from mixed html-entities/umlauts to pure $charset_out-umlauts |
| 709 |
function entities_to_umlauts($string, $charset_out=DEFAULT_CHARSET, $convert_htmlspecialchars=0) {
|
|
| 703 |
function entities_to_umlauts($string, $charset_out=DEFAULT_CHARSET) {
|
|
| 710 | 704 |
$charset_out = strtoupper($charset_out); |
| 711 | 705 |
if ($charset_out == '') { $charset_out = 'ISO-8859-1'; }
|
| 712 | 706 |
$charset_in = strtoupper(DEFAULT_CHARSET); |
| 713 |
|
|
| 714 |
// string to utf-8 |
|
| 715 |
if ($charset_in == 'ISO-8859-1' || $charset_in == 'UTF-8') {
|
|
| 707 |
require_once(WB_PATH.'/framework/charsets_table.php'); |
|
| 708 |
global $iso_8859_2_to_utf8, $iso_8859_3_to_utf8, $iso_8859_4_to_utf8, $iso_8859_5_to_utf8, $iso_8859_6_to_utf8, $iso_8859_7_to_utf8, $iso_8859_8_to_utf8, $iso_8859_9_to_utf8, $iso_8859_10_to_utf8, $iso_8859_11_to_utf8; |
|
| 709 |
global $utf8_to_iso_8859_2, $utf8_to_iso_8859_3, $utf8_to_iso_8859_4, $utf8_to_iso_8859_5, $utf8_to_iso_8859_6, $utf8_to_iso_8859_7, $utf8_to_iso_8859_8, $utf8_to_iso_8859_9, $utf8_to_iso_8859_10, $utf8_to_iso_8859_11; |
|
| 710 |
|
|
| 711 |
// string to utf-8, entities_to_utf8 |
|
| 712 |
if (substr($charset_in,0,8) == 'ISO-8859' || $charset_in == 'UTF-8') {
|
|
| 716 | 713 |
if ($charset_in == 'ISO-8859-1') {
|
| 717 | 714 |
$string=utf8_encode($string); |
| 715 |
} elseif ($charset_in == 'ISO-8859-2') {
|
|
| 716 |
$string = strtr($string, $iso_8859_2_to_utf8); |
|
| 717 |
} elseif ($charset_in == 'ISO-8859-3') {
|
|
| 718 |
$string = strtr($string, $iso_8859_3_to_utf8); |
|
| 719 |
} elseif ($charset_in == 'ISO-8859-4') {
|
|
| 720 |
$string = strtr($string, $iso_8859_4_to_utf8); |
|
| 721 |
} elseif ($charset_in == 'ISO-8859-5') {
|
|
| 722 |
$string = strtr($string, $iso_8859_5_to_utf8); |
|
| 723 |
} elseif ($charset_in == 'ISO-8859-6') {
|
|
| 724 |
$string = strtr($string, $iso_8859_6_to_utf8); |
|
| 725 |
} elseif ($charset_in == 'ISO-8859-7') {
|
|
| 726 |
$string = strtr($string, $iso_8859_7_to_utf8); |
|
| 727 |
} elseif ($charset_in == 'ISO-8859-8') {
|
|
| 728 |
$string = strtr($string, $iso_8859_8_to_utf8); |
|
| 729 |
} elseif ($charset_in == 'ISO-8859-9') {
|
|
| 730 |
$string = strtr($string, $iso_8859_9_to_utf8); |
|
| 731 |
} elseif ($charset_in == 'ISO-8859-10') {
|
|
| 732 |
$string = strtr($string, $iso_8859_10_to_utf8); |
|
| 733 |
} elseif ($charset_in == 'ISO-8859-11') {
|
|
| 734 |
$string = strtr($string, $iso_8859_11_to_utf8); |
|
| 718 | 735 |
} |
| 719 | 736 |
// decode html-entities |
| 720 | 737 |
if(preg_match("/&[#a-zA-Z0-9]+;/", $string)) {
|
| 721 | 738 |
$string=string_decode_encode_entities($string, 'UTF-8', 'HTML-ENTITIES'); |
| 722 |
//$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8'); |
|
| 739 |
//$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8'); // alternative to string_decode_encode_entities()
|
|
| 723 | 740 |
//$string=mb_convert_encoding_wrapper($string, 'UTF-8', 'HTML-ENTITIES'); |
| 724 | 741 |
} |
| 725 | 742 |
} |
| ... | ... | |
| 729 | 746 |
// string to $charset_out |
| 730 | 747 |
if($charset_out == 'ISO-8859-1') {
|
| 731 | 748 |
$string=utf8_decode($string); |
| 749 |
} elseif($charset_out == 'ISO-8859-2') {
|
|
| 750 |
$string = strtr($string, $utf8_to_iso_8859_2); |
|
| 751 |
} elseif($charset_out == 'ISO-8859-3') {
|
|
| 752 |
$string = strtr($string, $utf8_to_iso_8859_3); |
|
| 753 |
} elseif($charset_out == 'ISO-8859-4') {
|
|
| 754 |
$string = strtr($string, $utf8_to_iso_8859_4); |
|
| 755 |
} elseif($charset_out == 'ISO-8859-5') {
|
|
| 756 |
$string = strtr($string, $utf8_to_iso_8859_5); |
|
| 757 |
} elseif($charset_out == 'ISO-8859-6') {
|
|
| 758 |
$string = strtr($string, $utf8_to_iso_8859_6); |
|
| 759 |
} elseif($charset_out == 'ISO-8859-7') {
|
|
| 760 |
$string = strtr($string, $utf8_to_iso_8859_7); |
|
| 761 |
} elseif($charset_out == 'ISO-8859-8') {
|
|
| 762 |
$string = strtr($string, $utf8_to_iso_8859_8); |
|
| 763 |
} elseif($charset_out == 'ISO-8859-9') {
|
|
| 764 |
$string = strtr($string, $utf8_to_iso_8859_9); |
|
| 765 |
} elseif($charset_out == 'ISO-8859-10') {
|
|
| 766 |
$string = strtr($string, $utf8_to_iso_8859_10); |
|
| 767 |
} elseif($charset_out == 'ISO-8859-11') {
|
|
| 768 |
$string = strtr($string, $utf8_to_iso_8859_11); |
|
| 769 |
} elseif($charset_out != 'UTF-8') {
|
|
| 770 |
if(is_UTF8($string)) {
|
|
| 771 |
$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8'); |
|
| 772 |
} |
|
| 732 | 773 |
} |
| 733 |
elseif($charset_out != 'UTF-8' && is_UTF8($string)) {
|
|
| 734 |
$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8'); |
|
| 735 |
} |
|
| 736 | 774 |
return $string; |
| 737 | 775 |
} |
| 738 | 776 |
|
| 739 | 777 |
// Function to convert a string from mixed html-entitites/$charset_in-umlauts to pure html-entities |
| 740 |
function umlauts_to_entities($string, $charset_in=DEFAULT_CHARSET, $convert_htmlspecialchars=0) {
|
|
| 778 |
function umlauts_to_entities($string, $charset_in=DEFAULT_CHARSET) {
|
|
| 741 | 779 |
$charset_in = strtoupper($charset_in); |
| 742 | 780 |
if ($charset_in == "") { $charset_in = 'ISO-8859-1'; }
|
| 781 |
require_once(WB_PATH.'/framework/charsets_table.php'); |
|
| 782 |
global $iso_8859_2_to_utf8, $iso_8859_3_to_utf8, $iso_8859_4_to_utf8, $iso_8859_5_to_utf8, $iso_8859_6_to_utf8, $iso_8859_7_to_utf8, $iso_8859_8_to_utf8, $iso_8859_9_to_utf8, $iso_8859_10_to_utf8, $iso_8859_11_to_utf8; |
|
| 743 | 783 |
|
| 744 |
// string to utf-8 |
|
| 745 |
if ($charset_in == 'ISO-8859-1' || $charset_in == 'UTF-8') {
|
|
| 784 |
// string to utf-8, umlauts_to_entities
|
|
| 785 |
if ($charset_in == 'UTF-8' || substr($charset_in,0,8) == 'ISO-8859') {
|
|
| 746 | 786 |
if ($charset_in == 'ISO-8859-1') {
|
| 747 | 787 |
$string=utf8_encode($string); |
| 788 |
} elseif ($charset_in == 'ISO-8859-2') {
|
|
| 789 |
$string = strtr($string, $iso_8859_2_to_utf8); |
|
| 790 |
} elseif ($charset_in == 'ISO-8859-3') {
|
|
| 791 |
$string = strtr($string, $iso_8859_3_to_utf8); |
|
| 792 |
} elseif ($charset_in == 'ISO-8859-4') {
|
|
| 793 |
$string = strtr($string, $iso_8859_4_to_utf8); |
|
| 794 |
} elseif ($charset_in == 'ISO-8859-5') {
|
|
| 795 |
$string = strtr($string, $iso_8859_5_to_utf8); |
|
| 796 |
} elseif ($charset_in == 'ISO-8859-6') {
|
|
| 797 |
$string = strtr($string, $iso_8859_6_to_utf8); |
|
| 798 |
} elseif ($charset_in == 'ISO-8859-7') {
|
|
| 799 |
$string = strtr($string, $iso_8859_7_to_utf8); |
|
| 800 |
} elseif ($charset_in == 'ISO-8859-8') {
|
|
| 801 |
$string = strtr($string, $iso_8859_8_to_utf8); |
|
| 802 |
} elseif ($charset_in == 'ISO-8859-9') {
|
|
| 803 |
$string = strtr($string, $iso_8859_9_to_utf8); |
|
| 804 |
} elseif ($charset_in == 'ISO-8859-10') {
|
|
| 805 |
$string = strtr($string, $iso_8859_10_to_utf8); |
|
| 806 |
} elseif ($charset_in == 'ISO-8859-11') {
|
|
| 807 |
$string = strtr($string, $iso_8859_11_to_utf8); |
|
| 748 | 808 |
} |
| 749 | 809 |
// encode html-entities |
| 750 |
$string=string_decode_encode_entities($string, 'HTML-ENTITIES', 'UTF-8'); // this is very slow!
|
|
| 810 |
$string=string_decode_encode_entities($string, 'HTML-ENTITIES', 'UTF-8'); |
|
| 751 | 811 |
//$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8'); |
| 752 | 812 |
} |
| 753 | 813 |
else {
|
| ... | ... | |
| 764 | 824 |
function umlauts_to_defcharset($string, $charset) {
|
| 765 | 825 |
$charset_out = strtoupper(DEFAULT_CHARSET); |
| 766 | 826 |
if ($charset_out == "") { $charset_out = 'ISO-8859-1'; }
|
| 827 |
require_once(WB_PATH.'/framework/charsets_table.php'); |
|
| 828 |
global $utf8_to_iso_8859_2, $utf8_to_iso_8859_3, $utf8_to_iso_8859_4, $utf8_to_iso_8859_5, $utf8_to_iso_8859_6, $utf8_to_iso_8859_7, $utf8_to_iso_8859_8, $utf8_to_iso_8859_9, $utf8_to_iso_8859_10, $utf8_to_iso_8859_11; |
|
| 767 | 829 |
|
| 768 | 830 |
if($charset_out == $charset) {
|
| 769 | 831 |
return $string; |
| 770 | 832 |
} |
| 771 |
if($charset_out == 'ISO-8859-1' && $charset == 'UTF-8') {
|
|
| 772 |
$string = utf8_decode($string); |
|
| 833 |
|
|
| 834 |
if($charset == 'UTF-8') {
|
|
| 835 |
if($charset_out == 'ISO-8859-1') {
|
|
| 836 |
$string = utf8_decode($string); |
|
| 837 |
} elseif ($charset_out == 'ISO-8859-2') {
|
|
| 838 |
$string = strtr($string, $utf8_to_iso_8859_2); |
|
| 839 |
} elseif ($charset_out == 'ISO-8859-3') {
|
|
| 840 |
$string = strtr($string, $utf8_to_iso_8859_3); |
|
| 841 |
} elseif ($charset_out == 'ISO-8859-4') {
|
|
| 842 |
$string = strtr($string, $utf8_to_iso_8859_4); |
|
| 843 |
} elseif ($charset_out == 'ISO-8859-5') {
|
|
| 844 |
$string = strtr($string, $utf8_to_iso_8859_5); |
|
| 845 |
} elseif ($charset_out == 'ISO-8859-6') {
|
|
| 846 |
$string = strtr($string, $utf8_to_iso_8859_6); |
|
| 847 |
} elseif ($charset_out == 'ISO-8859-7') {
|
|
| 848 |
$string = strtr($string, $utf8_to_iso_8859_7); |
|
| 849 |
} elseif ($charset_out == 'ISO-8859-8') {
|
|
| 850 |
$string = strtr($string, $utf8_to_iso_8859_8); |
|
| 851 |
} elseif ($charset_out == 'ISO-8859-9') {
|
|
| 852 |
$string = strtr($string, $utf8_to_iso_8859_9); |
|
| 853 |
} elseif ($charset_out == 'ISO-8859-10') {
|
|
| 854 |
$string = strtr($string, $utf8_to_iso_8859_10); |
|
| 855 |
} elseif ($charset_out == 'ISO-8859-11') {
|
|
| 856 |
$string = strtr($string, $utf8_to_iso_8859_11); |
|
| 857 |
} |
|
| 858 |
else {
|
|
| 859 |
$string=mb_convert_encoding_wrapper($string, $charset_out, $charset); |
|
| 860 |
} |
|
| 773 | 861 |
} |
| 774 | 862 |
else {
|
| 775 |
$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8');
|
|
| 863 |
$string=mb_convert_encoding_wrapper($string, $charset_out, $charset);
|
|
| 776 | 864 |
} |
| 777 | 865 |
|
| 778 | 866 |
return $string; |
Also available in: Unified diff
Fixed converting issue with some ISO-charsets and speeded up converting on large pages (thanks to Thorn)