Revision 504
Added by Matthias over 17 years ago
functions.php | ||
---|---|---|
338 | 338 |
return $subs; |
339 | 339 |
} |
340 | 340 |
|
341 |
// Function as replecement for php's htmlspecialchars()
|
|
341 |
// Function as replacement for php's htmlspecialchars()
|
|
342 | 342 |
function my_htmlspecialchars($string) { |
343 | 343 |
$string = preg_replace("/&(?=[#a-z0-9]+;)/i", "_x_", $string); |
344 | 344 |
$string = strtr($string, array("<"=>"<", ">"=>">", "&"=>"&", "\""=>""", "\'"=>"'")); |
... | ... | |
613 | 613 |
); |
614 | 614 |
|
615 | 615 |
if ($in == 'HTML-ENTITIES') { |
616 |
$string = strtr($string, array('''=>''')); // fix a broken entity |
|
617 | 616 |
$string = strtr($string, $named_to_numbered_entities); |
618 | 617 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string); |
619 | 618 |
} |
620 | 619 |
elseif ($out == 'HTML-ENTITIES') { |
621 |
//$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string); |
|
622 | 620 |
$char = ""; |
623 |
while (strlen($string) > 0) { |
|
624 |
preg_match("/^(.)(.*)$/su", $string, $match); |
|
625 |
if (strlen($match[1]) > 1) { |
|
626 |
$char .= "&#".uniord($match[1]).";"; |
|
627 |
} else $char .= $match[1]; |
|
628 |
$string = $match[2]; |
|
629 |
} |
|
621 |
$i=0; |
|
622 |
$len=strlen($string); |
|
623 |
if($len==0) return $string; |
|
624 |
do { |
|
625 |
if(ord($string{$i}) <= 127) $ud = $string{$i++}; |
|
626 |
elseif(ord($string{$i}) <= 223) $ud = (ord($string{$i++})-192)*64 + (ord($string{$i++})-128); |
|
627 |
elseif(ord($string{$i}) <= 239) $ud = (ord($string{$i++})-224)*4096 + (ord($string{$i++})-128)*64 + (ord($string{$i++})-128); |
|
628 |
elseif(ord($string{$i}) <= 247) $ud = (ord($string{$i++})-240)*262144 + (ord($string{$i++})-128)*4096 + (ord($string{$i++})-128)*64 + (ord($string{$i++})-128); |
|
629 |
elseif(ord($string{$i}) <= 251) $ud = ord($string{$i++}); // error! |
|
630 |
if($ud > 127) { |
|
631 |
$char .= "&#$ud;"; |
|
632 |
} else { |
|
633 |
$char .= $ud; |
|
634 |
} |
|
635 |
} while($i < $len); |
|
630 | 636 |
$string = $char; |
631 | 637 |
$string = strtr($string, $numbered_to_named_entities); |
632 | 638 |
// do ' and " |
... | ... | |
636 | 642 |
} |
637 | 643 |
|
638 | 644 |
// support-function for string_decode_encode_entities() |
639 |
function uniord($c) { |
|
640 |
$ud = 0; |
|
641 |
if (ord($c{0}) >= 0 && ord($c{0}) <= 127) $ud = ord($c{0}); |
|
642 |
if (ord($c{0}) >= 192 && ord($c{0}) <= 223) $ud = (ord($c{0})-192)*64 + (ord($c{1})-128); |
|
643 |
if (ord($c{0}) >= 224 && ord($c{0}) <= 239) $ud = (ord($c{0})-224)*4096 + (ord($c{1})-128)*64 + (ord($c{2})-128); |
|
644 |
if (ord($c{0}) >= 240 && ord($c{0}) <= 247) $ud = (ord($c{0})-240)*262144 + (ord($c{1})-128)*4096 + (ord($c{2})-128)*64 + (ord($c{3})-128); |
|
645 |
if (ord($c{0}) >= 248 && ord($c{0}) <= 251) $ud = (ord($c{0})-248)*16777216 + (ord($c{1})-128)*262144 + (ord($c{2})-128)*4096 + (ord($c{3})-128)*64 + (ord($c{4})-128); |
|
646 |
if (ord($c{0}) >= 252 && ord($c{0}) <= 253) $ud = (ord($c{0})-252)*1073741824 + (ord($c{1})-128)*16777216 + (ord($c{2})-128)*262144 + (ord($c{3})-128)*4096 + (ord($c{4})-128)*64 + (ord($c{5})-128); |
|
647 |
if (ord($c{0}) >= 254 && ord($c{0}) <= 255) $ud = false; // error |
|
648 |
return $ud; |
|
649 |
} |
|
650 |
// support-function for mb_convert_encoding_wrapper() |
|
651 | 645 |
function code_to_utf8($num) { |
652 | 646 |
if ($num <= 0x7F) { |
653 | 647 |
return chr($num); |
... | ... | |
706 | 700 |
} |
707 | 701 |
|
708 | 702 |
// Function to convert a string from mixed html-entities/umlauts to pure $charset_out-umlauts |
709 |
function entities_to_umlauts($string, $charset_out=DEFAULT_CHARSET, $convert_htmlspecialchars=0) {
|
|
703 |
function entities_to_umlauts($string, $charset_out=DEFAULT_CHARSET) { |
|
710 | 704 |
$charset_out = strtoupper($charset_out); |
711 | 705 |
if ($charset_out == '') { $charset_out = 'ISO-8859-1'; } |
712 | 706 |
$charset_in = strtoupper(DEFAULT_CHARSET); |
713 |
|
|
714 |
// string to utf-8 |
|
715 |
if ($charset_in == 'ISO-8859-1' || $charset_in == 'UTF-8') { |
|
707 |
require_once(WB_PATH.'/framework/charsets_table.php'); |
|
708 |
global $iso_8859_2_to_utf8, $iso_8859_3_to_utf8, $iso_8859_4_to_utf8, $iso_8859_5_to_utf8, $iso_8859_6_to_utf8, $iso_8859_7_to_utf8, $iso_8859_8_to_utf8, $iso_8859_9_to_utf8, $iso_8859_10_to_utf8, $iso_8859_11_to_utf8; |
|
709 |
global $utf8_to_iso_8859_2, $utf8_to_iso_8859_3, $utf8_to_iso_8859_4, $utf8_to_iso_8859_5, $utf8_to_iso_8859_6, $utf8_to_iso_8859_7, $utf8_to_iso_8859_8, $utf8_to_iso_8859_9, $utf8_to_iso_8859_10, $utf8_to_iso_8859_11; |
|
710 |
|
|
711 |
// string to utf-8, entities_to_utf8 |
|
712 |
if (substr($charset_in,0,8) == 'ISO-8859' || $charset_in == 'UTF-8') { |
|
716 | 713 |
if ($charset_in == 'ISO-8859-1') { |
717 | 714 |
$string=utf8_encode($string); |
715 |
} elseif ($charset_in == 'ISO-8859-2') { |
|
716 |
$string = strtr($string, $iso_8859_2_to_utf8); |
|
717 |
} elseif ($charset_in == 'ISO-8859-3') { |
|
718 |
$string = strtr($string, $iso_8859_3_to_utf8); |
|
719 |
} elseif ($charset_in == 'ISO-8859-4') { |
|
720 |
$string = strtr($string, $iso_8859_4_to_utf8); |
|
721 |
} elseif ($charset_in == 'ISO-8859-5') { |
|
722 |
$string = strtr($string, $iso_8859_5_to_utf8); |
|
723 |
} elseif ($charset_in == 'ISO-8859-6') { |
|
724 |
$string = strtr($string, $iso_8859_6_to_utf8); |
|
725 |
} elseif ($charset_in == 'ISO-8859-7') { |
|
726 |
$string = strtr($string, $iso_8859_7_to_utf8); |
|
727 |
} elseif ($charset_in == 'ISO-8859-8') { |
|
728 |
$string = strtr($string, $iso_8859_8_to_utf8); |
|
729 |
} elseif ($charset_in == 'ISO-8859-9') { |
|
730 |
$string = strtr($string, $iso_8859_9_to_utf8); |
|
731 |
} elseif ($charset_in == 'ISO-8859-10') { |
|
732 |
$string = strtr($string, $iso_8859_10_to_utf8); |
|
733 |
} elseif ($charset_in == 'ISO-8859-11') { |
|
734 |
$string = strtr($string, $iso_8859_11_to_utf8); |
|
718 | 735 |
} |
719 | 736 |
// decode html-entities |
720 | 737 |
if(preg_match("/&[#a-zA-Z0-9]+;/", $string)) { |
721 | 738 |
$string=string_decode_encode_entities($string, 'UTF-8', 'HTML-ENTITIES'); |
722 |
//$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8'); |
|
739 |
//$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8'); // alternative to string_decode_encode_entities()
|
|
723 | 740 |
//$string=mb_convert_encoding_wrapper($string, 'UTF-8', 'HTML-ENTITIES'); |
724 | 741 |
} |
725 | 742 |
} |
... | ... | |
729 | 746 |
// string to $charset_out |
730 | 747 |
if($charset_out == 'ISO-8859-1') { |
731 | 748 |
$string=utf8_decode($string); |
749 |
} elseif($charset_out == 'ISO-8859-2') { |
|
750 |
$string = strtr($string, $utf8_to_iso_8859_2); |
|
751 |
} elseif($charset_out == 'ISO-8859-3') { |
|
752 |
$string = strtr($string, $utf8_to_iso_8859_3); |
|
753 |
} elseif($charset_out == 'ISO-8859-4') { |
|
754 |
$string = strtr($string, $utf8_to_iso_8859_4); |
|
755 |
} elseif($charset_out == 'ISO-8859-5') { |
|
756 |
$string = strtr($string, $utf8_to_iso_8859_5); |
|
757 |
} elseif($charset_out == 'ISO-8859-6') { |
|
758 |
$string = strtr($string, $utf8_to_iso_8859_6); |
|
759 |
} elseif($charset_out == 'ISO-8859-7') { |
|
760 |
$string = strtr($string, $utf8_to_iso_8859_7); |
|
761 |
} elseif($charset_out == 'ISO-8859-8') { |
|
762 |
$string = strtr($string, $utf8_to_iso_8859_8); |
|
763 |
} elseif($charset_out == 'ISO-8859-9') { |
|
764 |
$string = strtr($string, $utf8_to_iso_8859_9); |
|
765 |
} elseif($charset_out == 'ISO-8859-10') { |
|
766 |
$string = strtr($string, $utf8_to_iso_8859_10); |
|
767 |
} elseif($charset_out == 'ISO-8859-11') { |
|
768 |
$string = strtr($string, $utf8_to_iso_8859_11); |
|
769 |
} elseif($charset_out != 'UTF-8') { |
|
770 |
if(is_UTF8($string)) { |
|
771 |
$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8'); |
|
772 |
} |
|
732 | 773 |
} |
733 |
elseif($charset_out != 'UTF-8' && is_UTF8($string)) { |
|
734 |
$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8'); |
|
735 |
} |
|
736 | 774 |
return $string; |
737 | 775 |
} |
738 | 776 |
|
739 | 777 |
// Function to convert a string from mixed html-entitites/$charset_in-umlauts to pure html-entities |
740 |
function umlauts_to_entities($string, $charset_in=DEFAULT_CHARSET, $convert_htmlspecialchars=0) {
|
|
778 |
function umlauts_to_entities($string, $charset_in=DEFAULT_CHARSET) { |
|
741 | 779 |
$charset_in = strtoupper($charset_in); |
742 | 780 |
if ($charset_in == "") { $charset_in = 'ISO-8859-1'; } |
781 |
require_once(WB_PATH.'/framework/charsets_table.php'); |
|
782 |
global $iso_8859_2_to_utf8, $iso_8859_3_to_utf8, $iso_8859_4_to_utf8, $iso_8859_5_to_utf8, $iso_8859_6_to_utf8, $iso_8859_7_to_utf8, $iso_8859_8_to_utf8, $iso_8859_9_to_utf8, $iso_8859_10_to_utf8, $iso_8859_11_to_utf8; |
|
743 | 783 |
|
744 |
// string to utf-8 |
|
745 |
if ($charset_in == 'ISO-8859-1' || $charset_in == 'UTF-8') {
|
|
784 |
// string to utf-8, umlauts_to_entities
|
|
785 |
if ($charset_in == 'UTF-8' || substr($charset_in,0,8) == 'ISO-8859') {
|
|
746 | 786 |
if ($charset_in == 'ISO-8859-1') { |
747 | 787 |
$string=utf8_encode($string); |
788 |
} elseif ($charset_in == 'ISO-8859-2') { |
|
789 |
$string = strtr($string, $iso_8859_2_to_utf8); |
|
790 |
} elseif ($charset_in == 'ISO-8859-3') { |
|
791 |
$string = strtr($string, $iso_8859_3_to_utf8); |
|
792 |
} elseif ($charset_in == 'ISO-8859-4') { |
|
793 |
$string = strtr($string, $iso_8859_4_to_utf8); |
|
794 |
} elseif ($charset_in == 'ISO-8859-5') { |
|
795 |
$string = strtr($string, $iso_8859_5_to_utf8); |
|
796 |
} elseif ($charset_in == 'ISO-8859-6') { |
|
797 |
$string = strtr($string, $iso_8859_6_to_utf8); |
|
798 |
} elseif ($charset_in == 'ISO-8859-7') { |
|
799 |
$string = strtr($string, $iso_8859_7_to_utf8); |
|
800 |
} elseif ($charset_in == 'ISO-8859-8') { |
|
801 |
$string = strtr($string, $iso_8859_8_to_utf8); |
|
802 |
} elseif ($charset_in == 'ISO-8859-9') { |
|
803 |
$string = strtr($string, $iso_8859_9_to_utf8); |
|
804 |
} elseif ($charset_in == 'ISO-8859-10') { |
|
805 |
$string = strtr($string, $iso_8859_10_to_utf8); |
|
806 |
} elseif ($charset_in == 'ISO-8859-11') { |
|
807 |
$string = strtr($string, $iso_8859_11_to_utf8); |
|
748 | 808 |
} |
749 | 809 |
// encode html-entities |
750 |
$string=string_decode_encode_entities($string, 'HTML-ENTITIES', 'UTF-8'); // this is very slow!
|
|
810 |
$string=string_decode_encode_entities($string, 'HTML-ENTITIES', 'UTF-8'); |
|
751 | 811 |
//$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8'); |
752 | 812 |
} |
753 | 813 |
else { |
... | ... | |
764 | 824 |
function umlauts_to_defcharset($string, $charset) { |
765 | 825 |
$charset_out = strtoupper(DEFAULT_CHARSET); |
766 | 826 |
if ($charset_out == "") { $charset_out = 'ISO-8859-1'; } |
827 |
require_once(WB_PATH.'/framework/charsets_table.php'); |
|
828 |
global $utf8_to_iso_8859_2, $utf8_to_iso_8859_3, $utf8_to_iso_8859_4, $utf8_to_iso_8859_5, $utf8_to_iso_8859_6, $utf8_to_iso_8859_7, $utf8_to_iso_8859_8, $utf8_to_iso_8859_9, $utf8_to_iso_8859_10, $utf8_to_iso_8859_11; |
|
767 | 829 |
|
768 | 830 |
if($charset_out == $charset) { |
769 | 831 |
return $string; |
770 | 832 |
} |
771 |
if($charset_out == 'ISO-8859-1' && $charset == 'UTF-8') { |
|
772 |
$string = utf8_decode($string); |
|
833 |
|
|
834 |
if($charset == 'UTF-8') { |
|
835 |
if($charset_out == 'ISO-8859-1') { |
|
836 |
$string = utf8_decode($string); |
|
837 |
} elseif ($charset_out == 'ISO-8859-2') { |
|
838 |
$string = strtr($string, $utf8_to_iso_8859_2); |
|
839 |
} elseif ($charset_out == 'ISO-8859-3') { |
|
840 |
$string = strtr($string, $utf8_to_iso_8859_3); |
|
841 |
} elseif ($charset_out == 'ISO-8859-4') { |
|
842 |
$string = strtr($string, $utf8_to_iso_8859_4); |
|
843 |
} elseif ($charset_out == 'ISO-8859-5') { |
|
844 |
$string = strtr($string, $utf8_to_iso_8859_5); |
|
845 |
} elseif ($charset_out == 'ISO-8859-6') { |
|
846 |
$string = strtr($string, $utf8_to_iso_8859_6); |
|
847 |
} elseif ($charset_out == 'ISO-8859-7') { |
|
848 |
$string = strtr($string, $utf8_to_iso_8859_7); |
|
849 |
} elseif ($charset_out == 'ISO-8859-8') { |
|
850 |
$string = strtr($string, $utf8_to_iso_8859_8); |
|
851 |
} elseif ($charset_out == 'ISO-8859-9') { |
|
852 |
$string = strtr($string, $utf8_to_iso_8859_9); |
|
853 |
} elseif ($charset_out == 'ISO-8859-10') { |
|
854 |
$string = strtr($string, $utf8_to_iso_8859_10); |
|
855 |
} elseif ($charset_out == 'ISO-8859-11') { |
|
856 |
$string = strtr($string, $utf8_to_iso_8859_11); |
|
857 |
} |
|
858 |
else { |
|
859 |
$string=mb_convert_encoding_wrapper($string, $charset_out, $charset); |
|
860 |
} |
|
773 | 861 |
} |
774 | 862 |
else { |
775 |
$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8');
|
|
863 |
$string=mb_convert_encoding_wrapper($string, $charset_out, $charset);
|
|
776 | 864 |
} |
777 | 865 |
|
778 | 866 |
return $string; |
Also available in: Unified diff
Fixed converting issue with some ISO-charsets and speeded up converting on large pages (thanks to Thorn)