338 |
338 |
return $subs;
|
339 |
339 |
}
|
340 |
340 |
|
341 |
|
// Function as replecement for php's htmlspecialchars()
|
|
341 |
// Function as replacement for php's htmlspecialchars()
|
342 |
342 |
function my_htmlspecialchars($string) {
|
343 |
343 |
$string = preg_replace("/&(?=[#a-z0-9]+;)/i", "_x_", $string);
|
344 |
344 |
$string = strtr($string, array("<"=>"<", ">"=>">", "&"=>"&", "\""=>""", "\'"=>"'"));
|
... | ... | |
613 |
613 |
);
|
614 |
614 |
|
615 |
615 |
if ($in == 'HTML-ENTITIES') {
|
616 |
|
$string = strtr($string, array('''=>''')); // fix a broken entity
|
617 |
616 |
$string = strtr($string, $named_to_numbered_entities);
|
618 |
617 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
|
619 |
618 |
}
|
620 |
619 |
elseif ($out == 'HTML-ENTITIES') {
|
621 |
|
//$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
|
622 |
620 |
$char = "";
|
623 |
|
while (strlen($string) > 0) {
|
624 |
|
preg_match("/^(.)(.*)$/su", $string, $match);
|
625 |
|
if (strlen($match[1]) > 1) {
|
626 |
|
$char .= "&#".uniord($match[1]).";";
|
627 |
|
} else $char .= $match[1];
|
628 |
|
$string = $match[2];
|
629 |
|
}
|
|
621 |
$i=0;
|
|
622 |
$len=strlen($string);
|
|
623 |
if($len==0) return $string;
|
|
624 |
do {
|
|
625 |
if(ord($string{$i}) <= 127) $ud = $string{$i++};
|
|
626 |
elseif(ord($string{$i}) <= 223) $ud = (ord($string{$i++})-192)*64 + (ord($string{$i++})-128);
|
|
627 |
elseif(ord($string{$i}) <= 239) $ud = (ord($string{$i++})-224)*4096 + (ord($string{$i++})-128)*64 + (ord($string{$i++})-128);
|
|
628 |
elseif(ord($string{$i}) <= 247) $ud = (ord($string{$i++})-240)*262144 + (ord($string{$i++})-128)*4096 + (ord($string{$i++})-128)*64 + (ord($string{$i++})-128);
|
|
629 |
elseif(ord($string{$i}) <= 251) $ud = ord($string{$i++}); // error!
|
|
630 |
if($ud > 127) {
|
|
631 |
$char .= "&#$ud;";
|
|
632 |
} else {
|
|
633 |
$char .= $ud;
|
|
634 |
}
|
|
635 |
} while($i < $len);
|
630 |
636 |
$string = $char;
|
631 |
637 |
$string = strtr($string, $numbered_to_named_entities);
|
632 |
638 |
// do ' and "
|
... | ... | |
636 |
642 |
}
|
637 |
643 |
|
638 |
644 |
// support-function for string_decode_encode_entities()
|
639 |
|
function uniord($c) {
|
640 |
|
$ud = 0;
|
641 |
|
if (ord($c{0}) >= 0 && ord($c{0}) <= 127) $ud = ord($c{0});
|
642 |
|
if (ord($c{0}) >= 192 && ord($c{0}) <= 223) $ud = (ord($c{0})-192)*64 + (ord($c{1})-128);
|
643 |
|
if (ord($c{0}) >= 224 && ord($c{0}) <= 239) $ud = (ord($c{0})-224)*4096 + (ord($c{1})-128)*64 + (ord($c{2})-128);
|
644 |
|
if (ord($c{0}) >= 240 && ord($c{0}) <= 247) $ud = (ord($c{0})-240)*262144 + (ord($c{1})-128)*4096 + (ord($c{2})-128)*64 + (ord($c{3})-128);
|
645 |
|
if (ord($c{0}) >= 248 && ord($c{0}) <= 251) $ud = (ord($c{0})-248)*16777216 + (ord($c{1})-128)*262144 + (ord($c{2})-128)*4096 + (ord($c{3})-128)*64 + (ord($c{4})-128);
|
646 |
|
if (ord($c{0}) >= 252 && ord($c{0}) <= 253) $ud = (ord($c{0})-252)*1073741824 + (ord($c{1})-128)*16777216 + (ord($c{2})-128)*262144 + (ord($c{3})-128)*4096 + (ord($c{4})-128)*64 + (ord($c{5})-128);
|
647 |
|
if (ord($c{0}) >= 254 && ord($c{0}) <= 255) $ud = false; // error
|
648 |
|
return $ud;
|
649 |
|
}
|
650 |
|
// support-function for mb_convert_encoding_wrapper()
|
651 |
645 |
function code_to_utf8($num) {
|
652 |
646 |
if ($num <= 0x7F) {
|
653 |
647 |
return chr($num);
|
... | ... | |
706 |
700 |
}
|
707 |
701 |
|
708 |
702 |
// Function to convert a string from mixed html-entities/umlauts to pure $charset_out-umlauts
|
709 |
|
function entities_to_umlauts($string, $charset_out=DEFAULT_CHARSET, $convert_htmlspecialchars=0) {
|
|
703 |
function entities_to_umlauts($string, $charset_out=DEFAULT_CHARSET) {
|
710 |
704 |
$charset_out = strtoupper($charset_out);
|
711 |
705 |
if ($charset_out == '') { $charset_out = 'ISO-8859-1'; }
|
712 |
706 |
$charset_in = strtoupper(DEFAULT_CHARSET);
|
713 |
|
|
714 |
|
// string to utf-8
|
715 |
|
if ($charset_in == 'ISO-8859-1' || $charset_in == 'UTF-8') {
|
|
707 |
require_once(WB_PATH.'/framework/charsets_table.php');
|
|
708 |
global $iso_8859_2_to_utf8, $iso_8859_3_to_utf8, $iso_8859_4_to_utf8, $iso_8859_5_to_utf8, $iso_8859_6_to_utf8, $iso_8859_7_to_utf8, $iso_8859_8_to_utf8, $iso_8859_9_to_utf8, $iso_8859_10_to_utf8, $iso_8859_11_to_utf8;
|
|
709 |
global $utf8_to_iso_8859_2, $utf8_to_iso_8859_3, $utf8_to_iso_8859_4, $utf8_to_iso_8859_5, $utf8_to_iso_8859_6, $utf8_to_iso_8859_7, $utf8_to_iso_8859_8, $utf8_to_iso_8859_9, $utf8_to_iso_8859_10, $utf8_to_iso_8859_11;
|
|
710 |
|
|
711 |
// string to utf-8, entities_to_utf8
|
|
712 |
if (substr($charset_in,0,8) == 'ISO-8859' || $charset_in == 'UTF-8') {
|
716 |
713 |
if ($charset_in == 'ISO-8859-1') {
|
717 |
714 |
$string=utf8_encode($string);
|
|
715 |
} elseif ($charset_in == 'ISO-8859-2') {
|
|
716 |
$string = strtr($string, $iso_8859_2_to_utf8);
|
|
717 |
} elseif ($charset_in == 'ISO-8859-3') {
|
|
718 |
$string = strtr($string, $iso_8859_3_to_utf8);
|
|
719 |
} elseif ($charset_in == 'ISO-8859-4') {
|
|
720 |
$string = strtr($string, $iso_8859_4_to_utf8);
|
|
721 |
} elseif ($charset_in == 'ISO-8859-5') {
|
|
722 |
$string = strtr($string, $iso_8859_5_to_utf8);
|
|
723 |
} elseif ($charset_in == 'ISO-8859-6') {
|
|
724 |
$string = strtr($string, $iso_8859_6_to_utf8);
|
|
725 |
} elseif ($charset_in == 'ISO-8859-7') {
|
|
726 |
$string = strtr($string, $iso_8859_7_to_utf8);
|
|
727 |
} elseif ($charset_in == 'ISO-8859-8') {
|
|
728 |
$string = strtr($string, $iso_8859_8_to_utf8);
|
|
729 |
} elseif ($charset_in == 'ISO-8859-9') {
|
|
730 |
$string = strtr($string, $iso_8859_9_to_utf8);
|
|
731 |
} elseif ($charset_in == 'ISO-8859-10') {
|
|
732 |
$string = strtr($string, $iso_8859_10_to_utf8);
|
|
733 |
} elseif ($charset_in == 'ISO-8859-11') {
|
|
734 |
$string = strtr($string, $iso_8859_11_to_utf8);
|
718 |
735 |
}
|
719 |
736 |
// decode html-entities
|
720 |
737 |
if(preg_match("/&[#a-zA-Z0-9]+;/", $string)) {
|
721 |
738 |
$string=string_decode_encode_entities($string, 'UTF-8', 'HTML-ENTITIES');
|
722 |
|
//$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8');
|
|
739 |
//$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8'); // alternative to string_decode_encode_entities()
|
723 |
740 |
//$string=mb_convert_encoding_wrapper($string, 'UTF-8', 'HTML-ENTITIES');
|
724 |
741 |
}
|
725 |
742 |
}
|
... | ... | |
729 |
746 |
// string to $charset_out
|
730 |
747 |
if($charset_out == 'ISO-8859-1') {
|
731 |
748 |
$string=utf8_decode($string);
|
|
749 |
} elseif($charset_out == 'ISO-8859-2') {
|
|
750 |
$string = strtr($string, $utf8_to_iso_8859_2);
|
|
751 |
} elseif($charset_out == 'ISO-8859-3') {
|
|
752 |
$string = strtr($string, $utf8_to_iso_8859_3);
|
|
753 |
} elseif($charset_out == 'ISO-8859-4') {
|
|
754 |
$string = strtr($string, $utf8_to_iso_8859_4);
|
|
755 |
} elseif($charset_out == 'ISO-8859-5') {
|
|
756 |
$string = strtr($string, $utf8_to_iso_8859_5);
|
|
757 |
} elseif($charset_out == 'ISO-8859-6') {
|
|
758 |
$string = strtr($string, $utf8_to_iso_8859_6);
|
|
759 |
} elseif($charset_out == 'ISO-8859-7') {
|
|
760 |
$string = strtr($string, $utf8_to_iso_8859_7);
|
|
761 |
} elseif($charset_out == 'ISO-8859-8') {
|
|
762 |
$string = strtr($string, $utf8_to_iso_8859_8);
|
|
763 |
} elseif($charset_out == 'ISO-8859-9') {
|
|
764 |
$string = strtr($string, $utf8_to_iso_8859_9);
|
|
765 |
} elseif($charset_out == 'ISO-8859-10') {
|
|
766 |
$string = strtr($string, $utf8_to_iso_8859_10);
|
|
767 |
} elseif($charset_out == 'ISO-8859-11') {
|
|
768 |
$string = strtr($string, $utf8_to_iso_8859_11);
|
|
769 |
} elseif($charset_out != 'UTF-8') {
|
|
770 |
if(is_UTF8($string)) {
|
|
771 |
$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8');
|
|
772 |
}
|
732 |
773 |
}
|
733 |
|
elseif($charset_out != 'UTF-8' && is_UTF8($string)) {
|
734 |
|
$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8');
|
735 |
|
}
|
736 |
774 |
return $string;
|
737 |
775 |
}
|
738 |
776 |
|
739 |
777 |
// Function to convert a string from mixed html-entitites/$charset_in-umlauts to pure html-entities
|
740 |
|
function umlauts_to_entities($string, $charset_in=DEFAULT_CHARSET, $convert_htmlspecialchars=0) {
|
|
778 |
function umlauts_to_entities($string, $charset_in=DEFAULT_CHARSET) {
|
741 |
779 |
$charset_in = strtoupper($charset_in);
|
742 |
780 |
if ($charset_in == "") { $charset_in = 'ISO-8859-1'; }
|
|
781 |
require_once(WB_PATH.'/framework/charsets_table.php');
|
|
782 |
global $iso_8859_2_to_utf8, $iso_8859_3_to_utf8, $iso_8859_4_to_utf8, $iso_8859_5_to_utf8, $iso_8859_6_to_utf8, $iso_8859_7_to_utf8, $iso_8859_8_to_utf8, $iso_8859_9_to_utf8, $iso_8859_10_to_utf8, $iso_8859_11_to_utf8;
|
743 |
783 |
|
744 |
|
// string to utf-8
|
745 |
|
if ($charset_in == 'ISO-8859-1' || $charset_in == 'UTF-8') {
|
|
784 |
// string to utf-8, umlauts_to_entities
|
|
785 |
if ($charset_in == 'UTF-8' || substr($charset_in,0,8) == 'ISO-8859') {
|
746 |
786 |
if ($charset_in == 'ISO-8859-1') {
|
747 |
787 |
$string=utf8_encode($string);
|
|
788 |
} elseif ($charset_in == 'ISO-8859-2') {
|
|
789 |
$string = strtr($string, $iso_8859_2_to_utf8);
|
|
790 |
} elseif ($charset_in == 'ISO-8859-3') {
|
|
791 |
$string = strtr($string, $iso_8859_3_to_utf8);
|
|
792 |
} elseif ($charset_in == 'ISO-8859-4') {
|
|
793 |
$string = strtr($string, $iso_8859_4_to_utf8);
|
|
794 |
} elseif ($charset_in == 'ISO-8859-5') {
|
|
795 |
$string = strtr($string, $iso_8859_5_to_utf8);
|
|
796 |
} elseif ($charset_in == 'ISO-8859-6') {
|
|
797 |
$string = strtr($string, $iso_8859_6_to_utf8);
|
|
798 |
} elseif ($charset_in == 'ISO-8859-7') {
|
|
799 |
$string = strtr($string, $iso_8859_7_to_utf8);
|
|
800 |
} elseif ($charset_in == 'ISO-8859-8') {
|
|
801 |
$string = strtr($string, $iso_8859_8_to_utf8);
|
|
802 |
} elseif ($charset_in == 'ISO-8859-9') {
|
|
803 |
$string = strtr($string, $iso_8859_9_to_utf8);
|
|
804 |
} elseif ($charset_in == 'ISO-8859-10') {
|
|
805 |
$string = strtr($string, $iso_8859_10_to_utf8);
|
|
806 |
} elseif ($charset_in == 'ISO-8859-11') {
|
|
807 |
$string = strtr($string, $iso_8859_11_to_utf8);
|
748 |
808 |
}
|
749 |
809 |
// encode html-entities
|
750 |
|
$string=string_decode_encode_entities($string, 'HTML-ENTITIES', 'UTF-8'); // this is very slow!
|
|
810 |
$string=string_decode_encode_entities($string, 'HTML-ENTITIES', 'UTF-8');
|
751 |
811 |
//$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8');
|
752 |
812 |
}
|
753 |
813 |
else {
|
... | ... | |
764 |
824 |
function umlauts_to_defcharset($string, $charset) {
|
765 |
825 |
$charset_out = strtoupper(DEFAULT_CHARSET);
|
766 |
826 |
if ($charset_out == "") { $charset_out = 'ISO-8859-1'; }
|
|
827 |
require_once(WB_PATH.'/framework/charsets_table.php');
|
|
828 |
global $utf8_to_iso_8859_2, $utf8_to_iso_8859_3, $utf8_to_iso_8859_4, $utf8_to_iso_8859_5, $utf8_to_iso_8859_6, $utf8_to_iso_8859_7, $utf8_to_iso_8859_8, $utf8_to_iso_8859_9, $utf8_to_iso_8859_10, $utf8_to_iso_8859_11;
|
767 |
829 |
|
768 |
830 |
if($charset_out == $charset) {
|
769 |
831 |
return $string;
|
770 |
832 |
}
|
771 |
|
if($charset_out == 'ISO-8859-1' && $charset == 'UTF-8') {
|
772 |
|
$string = utf8_decode($string);
|
|
833 |
|
|
834 |
if($charset == 'UTF-8') {
|
|
835 |
if($charset_out == 'ISO-8859-1') {
|
|
836 |
$string = utf8_decode($string);
|
|
837 |
} elseif ($charset_out == 'ISO-8859-2') {
|
|
838 |
$string = strtr($string, $utf8_to_iso_8859_2);
|
|
839 |
} elseif ($charset_out == 'ISO-8859-3') {
|
|
840 |
$string = strtr($string, $utf8_to_iso_8859_3);
|
|
841 |
} elseif ($charset_out == 'ISO-8859-4') {
|
|
842 |
$string = strtr($string, $utf8_to_iso_8859_4);
|
|
843 |
} elseif ($charset_out == 'ISO-8859-5') {
|
|
844 |
$string = strtr($string, $utf8_to_iso_8859_5);
|
|
845 |
} elseif ($charset_out == 'ISO-8859-6') {
|
|
846 |
$string = strtr($string, $utf8_to_iso_8859_6);
|
|
847 |
} elseif ($charset_out == 'ISO-8859-7') {
|
|
848 |
$string = strtr($string, $utf8_to_iso_8859_7);
|
|
849 |
} elseif ($charset_out == 'ISO-8859-8') {
|
|
850 |
$string = strtr($string, $utf8_to_iso_8859_8);
|
|
851 |
} elseif ($charset_out == 'ISO-8859-9') {
|
|
852 |
$string = strtr($string, $utf8_to_iso_8859_9);
|
|
853 |
} elseif ($charset_out == 'ISO-8859-10') {
|
|
854 |
$string = strtr($string, $utf8_to_iso_8859_10);
|
|
855 |
} elseif ($charset_out == 'ISO-8859-11') {
|
|
856 |
$string = strtr($string, $utf8_to_iso_8859_11);
|
|
857 |
}
|
|
858 |
else {
|
|
859 |
$string=mb_convert_encoding_wrapper($string, $charset_out, $charset);
|
|
860 |
}
|
773 |
861 |
}
|
774 |
862 |
else {
|
775 |
|
$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8');
|
|
863 |
$string=mb_convert_encoding_wrapper($string, $charset_out, $charset);
|
776 |
864 |
}
|
777 |
865 |
|
778 |
866 |
return $string;
|
Fixed converting issue with some ISO-charsets and speeded up converting on large pages (thanks to Thorn)