Revision 1748
Added by darkviper almost 12 years ago
idna_convert.class.php | ||
---|---|---|
36 | 36 |
* simple strings and complete email addresses as well. That means, that you might |
37 | 37 |
* use any of the following notations: |
38 | 38 |
* |
39 |
* - www.n?rgler.com
|
|
39 |
* - www.nรถrgler.com
|
|
40 | 40 |
* - xn--nrgler-wxa |
41 | 41 |
* - xn--brse-5qa.xn--knrz-1ra.info |
42 | 42 |
* |
... | ... | |
47 | 47 |
* ACE input and output is always expected to be ASCII. |
48 | 48 |
* |
49 | 49 |
* @author Matthias Sommerfeld <mso@phlylabs.de> |
50 |
* @author Leonid Kogan <lko@neuse.de> |
|
51 |
* @copyright 2004-2010 phlyLabs Berlin, http://phlylabs.de |
|
52 |
* @version 0.6.9 2010-11-04 |
|
50 |
* @copyright 2004-2011 phlyLabs Berlin, http://phlylabs.de |
|
51 |
* @version 0.8.0 2011-03-11 |
|
53 | 52 |
*/ |
54 | 53 |
class idna_convert |
55 | 54 |
{ |
... | ... | |
77 | 76 |
protected $_scount = 11172; // _lcount * _tcount * _vcount |
78 | 77 |
protected $_error = false; |
79 | 78 |
|
79 |
protected static $_mb_string_overload = null; |
|
80 |
|
|
80 | 81 |
// See {@link set_paramter()} for details of how to change the following |
81 | 82 |
// settings from within your script / application |
82 | 83 |
protected $_api_encoding = 'utf8'; // Default input charset is UTF-8 |
83 | 84 |
protected $_allow_overlong = false; // Overlong UTF-8 encodings are forbidden |
84 | 85 |
protected $_strict_mode = false; // Behave strict or not |
85 |
protected $_encode_german_sz = true; // True to encode German ?; False, if not
|
|
86 |
protected $_idn_version = 2003; // Can be either 2003 (old, default) or 2008
|
|
86 | 87 |
|
87 | 88 |
/** |
88 | 89 |
* the constructor |
... | ... | |
95 | 96 |
{ |
96 | 97 |
$this->slast = $this->_sbase + $this->_lcount * $this->_vcount * $this->_tcount; |
97 | 98 |
// If parameters are given, pass these to the respective method |
98 |
if (is_array($options)) return $this->set_parameter($options); |
|
99 |
if (!$this->_encode_german_sz) { |
|
100 |
$this->NP['replacemaps'][0xDF] = array(0x73, 0x73); |
|
99 |
if (is_array($options)) { |
|
100 |
$this->set_parameter($options); |
|
101 | 101 |
} |
102 |
|
|
103 |
// populate mbstring overloading cache if not set |
|
104 |
if (self::$_mb_string_overload === null) { |
|
105 |
self::$_mb_string_overload = (extension_loaded('mbstring') |
|
106 |
&& (ini_get('mbstring.func_overload') & 0x02) === 0x02); |
|
107 |
} |
|
102 | 108 |
} |
103 | 109 |
|
104 | 110 |
/** |
... | ... | |
141 | 147 |
case 'strict': |
142 | 148 |
$this->_strict_mode = ($v) ? true : false; |
143 | 149 |
break; |
144 |
case 'encode_german_sz': |
|
145 |
$this->_encode_german_sz = ($v) ? true : false; |
|
150 |
case 'idn_version': |
|
151 |
if (in_array($v, array('2003', '2008'))) { |
|
152 |
$this->_idn_version = $v; |
|
153 |
} else { |
|
154 |
$this->_error('Set Parameter: Unknown parameter '.$v.' for option '.$k); |
|
155 |
} |
|
146 | 156 |
break; |
157 |
case 'encode_german_sz': // Deprecated |
|
158 |
if (!$v) { |
|
159 |
self::$NP['replacemaps'][0xDF] = array(0x73, 0x73); |
|
160 |
} else { |
|
161 |
unset(self::$NP['replacemaps'][0xDF]); |
|
162 |
} |
|
163 |
break; |
|
147 | 164 |
default: |
148 | 165 |
$this->_error('Set Parameter: Unknown option '.$k); |
149 | 166 |
return false; |
... | ... | |
399 | 416 |
} |
400 | 417 |
// Find last occurence of the delimiter |
401 | 418 |
$delim_pos = strrpos($encoded, '-'); |
402 |
if ($delim_pos > strlen($this->_punycode_prefix)) {
|
|
403 |
for ($k = strlen($this->_punycode_prefix); $k < $delim_pos; ++$k) {
|
|
419 |
if ($delim_pos > self::byteLength($this->_punycode_prefix)) {
|
|
420 |
for ($k = self::byteLength($this->_punycode_prefix); $k < $delim_pos; ++$k) {
|
|
404 | 421 |
$decoded[] = ord($encoded{$k}); |
405 | 422 |
} |
406 | 423 |
} |
407 | 424 |
$deco_len = count($decoded); |
408 |
$enco_len = strlen($encoded);
|
|
425 |
$enco_len = self::byteLength($encoded);
|
|
409 | 426 |
|
410 | 427 |
// Wandering through the strings; init |
411 | 428 |
$is_first = true; |
... | ... | |
443 | 460 |
protected function _encode($decoded) |
444 | 461 |
{ |
445 | 462 |
// We cannot encode a domain name containing the Punycode prefix |
446 |
$extract = strlen($this->_punycode_prefix);
|
|
463 |
$extract = self::byteLength($this->_punycode_prefix);
|
|
447 | 464 |
$check_pref = $this->_utf8_to_ucs4($this->_punycode_prefix); |
448 | 465 |
$check_deco = array_slice($decoded, 0, $extract); |
449 | 466 |
|
... | ... | |
590 | 607 |
// While mapping required chars we apply the cannonical ordering |
591 | 608 |
foreach ($input as $v) { |
592 | 609 |
// Map to nothing == skip that code point |
593 |
if (in_array($v, $this->NP['map_nothing'])) continue;
|
|
610 |
if (in_array($v, self::$NP['map_nothing'])) continue;
|
|
594 | 611 |
// Try to find prohibited input |
595 |
if (in_array($v, $this->NP['prohibit']) || in_array($v, $this->NP['general_prohibited'])) {
|
|
612 |
if (in_array($v, self::$NP['prohibit']) || in_array($v, self::$NP['general_prohibited'])) {
|
|
596 | 613 |
$this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v)); |
597 | 614 |
return false; |
598 | 615 |
} |
599 |
foreach ($this->NP['prohibit_ranges'] as $range) {
|
|
616 |
foreach (self::$NP['prohibit_ranges'] as $range) {
|
|
600 | 617 |
if ($range[0] <= $v && $v <= $range[1]) { |
601 | 618 |
$this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v)); |
602 | 619 |
return false; |
603 | 620 |
} |
604 | 621 |
} |
605 |
// Hangul syllable decomposition |
|
622 |
|
|
606 | 623 |
if (0xAC00 <= $v && $v <= 0xD7AF) { |
607 |
foreach ($this->_hangul_decompose($v) as $out) $output[] = (int) $out; |
|
608 |
// There's a decomposition mapping for that code point |
|
609 |
} elseif (isset($this->NP['replacemaps'][$v])) { |
|
610 |
foreach ($this->_apply_cannonical_ordering($this->NP['replacemaps'][$v]) as $out) { |
|
624 |
// Hangul syllable decomposition |
|
625 |
foreach ($this->_hangul_decompose($v) as $out) { |
|
611 | 626 |
$output[] = (int) $out; |
612 | 627 |
} |
628 |
} elseif (($this->_idn_version == '2003') && isset(self::$NP['replacemaps'][$v])) { |
|
629 |
// There's a decomposition mapping for that code point |
|
630 |
// Decompositions only in version 2003 (original) of IDNA |
|
631 |
foreach ($this->_apply_cannonical_ordering(self::$NP['replacemaps'][$v]) as $out) { |
|
632 |
$output[] = (int) $out; |
|
633 |
} |
|
613 | 634 |
} else { |
614 | 635 |
$output[] = (int) $v; |
615 | 636 |
} |
... | ... | |
716 | 737 |
*/ |
717 | 738 |
protected function _get_combining_class($char) |
718 | 739 |
{ |
719 |
return isset($this->NP['norm_combcls'][$char]) ? $this->NP['norm_combcls'][$char] : 0;
|
|
740 |
return isset(self::$NP['norm_combcls'][$char]) ? self::$NP['norm_combcls'][$char] : 0;
|
|
720 | 741 |
} |
721 | 742 |
|
722 | 743 |
/** |
723 |
* Apllies the cannonical ordering of a decomposed UCS4 sequence
|
|
744 |
* Applies the cannonical ordering of a decomposed UCS4 sequence
|
|
724 | 745 |
* @param array Decomposed UCS4 sequence |
725 | 746 |
* @return array Ordered USC4 sequence |
726 | 747 |
*/ |
... | ... | |
759 | 780 |
protected function _combine($input) |
760 | 781 |
{ |
761 | 782 |
$inp_len = count($input); |
762 |
foreach ($this->NP['replacemaps'] as $np_src => $np_target) {
|
|
783 |
foreach (self::$NP['replacemaps'] as $np_src => $np_target) {
|
|
763 | 784 |
if ($np_target[0] != $input[0]) continue; |
764 | 785 |
if (count($np_target) != $inp_len) continue; |
765 | 786 |
$hit = false; |
... | ... | |
798 | 819 |
{ |
799 | 820 |
$output = array(); |
800 | 821 |
$out_len = 0; |
801 |
// Patch by Daniel Hahler; work around prolbem with mbstring.func_overload |
|
802 |
if (function_exists('mb_strlen')) { |
|
803 |
$inp_len = mb_strlen($input, '8bit'); |
|
804 |
} else { |
|
805 |
$inp_len = strlen($input); |
|
806 |
} |
|
822 |
$inp_len = self::byteLength($input); |
|
807 | 823 |
$mode = 'next'; |
808 | 824 |
$test = 'none'; |
809 | 825 |
for ($k = 0; $k < $inp_len; ++$k) { |
... | ... | |
924 | 940 |
protected function _ucs4_string_to_ucs4($input) |
925 | 941 |
{ |
926 | 942 |
$output = array(); |
927 |
$inp_len = strlen($input);
|
|
943 |
$inp_len = self::byteLength($input);
|
|
928 | 944 |
// Input length must be dividable by 4 |
929 | 945 |
if ($inp_len % 4) { |
930 | 946 |
$this->_error('Input UCS4 string is broken'); |
... | ... | |
944 | 960 |
} |
945 | 961 |
|
946 | 962 |
/** |
963 |
* Gets the length of a string in bytes even if mbstring function |
|
964 |
* overloading is turned on |
|
965 |
* |
|
966 |
* @param string $string the string for which to get the length. |
|
967 |
* @return integer the length of the string in bytes. |
|
968 |
*/ |
|
969 |
protected static function byteLength($string) |
|
970 |
{ |
|
971 |
if (self::$_mb_string_overload) { |
|
972 |
return mb_strlen($string, '8bit'); |
|
973 |
} |
|
974 |
return strlen((binary) $string); |
|
975 |
} |
|
976 |
|
|
977 |
/** |
|
978 |
* Attempts to return a concrete IDNA instance. |
|
979 |
* |
|
980 |
* @param array $params Set of paramaters |
|
981 |
* @return idna_convert |
|
982 |
* @access public |
|
983 |
*/ |
|
984 |
public function getInstance($params = array()) |
|
985 |
{ |
|
986 |
return new idna_convert($params); |
|
987 |
} |
|
988 |
|
|
989 |
/** |
|
990 |
* Attempts to return a concrete IDNA instance for either php4 or php5, |
|
991 |
* only creating a new instance if no IDNA instance with the same |
|
992 |
* parameters currently exists. |
|
993 |
* |
|
994 |
* @param array $params Set of paramaters |
|
995 |
* |
|
996 |
* @return object idna_convert |
|
997 |
* @access public |
|
998 |
*/ |
|
999 |
public function singleton($params = array()) |
|
1000 |
{ |
|
1001 |
static $instances; |
|
1002 |
if (!isset($instances)) { |
|
1003 |
$instances = array(); |
|
1004 |
} |
|
1005 |
$signature = serialize($params); |
|
1006 |
if (!isset($instances[$signature])) { |
|
1007 |
$instances[$signature] = idna_convert::getInstance($params); |
|
1008 |
} |
|
1009 |
return $instances[$signature]; |
|
1010 |
} |
|
1011 |
|
|
1012 |
/** |
|
947 | 1013 |
* Holds all relevant mapping tables |
948 | 1014 |
* See RFC3454 for details |
949 | 1015 |
* |
950 | 1016 |
* @private array |
951 | 1017 |
* @since 0.5.2 |
952 | 1018 |
*/ |
953 |
protected $NP = array |
|
1019 |
protected static $NP = array
|
|
954 | 1020 |
('map_nothing' => array(0xAD, 0x34F, 0x1806, 0x180B, 0x180C, 0x180D, 0x200B, 0x200C |
955 | 1021 |
,0x200D, 0x2060, 0xFE00, 0xFE01, 0xFE02, 0xFE03, 0xFE04, 0xFE05, 0xFE06, 0xFE07 |
956 | 1022 |
,0xFE08, 0xFE09, 0xFE0A, 0xFE0B, 0xFE0C, 0xFE0D, 0xFE0E, 0xFE0F, 0xFEFF |
... | ... | |
985 | 1051 |
,0xD0 => array(0xF0), 0xD1 => array(0xF1), 0xD2 => array(0xF2), 0xD3 => array(0xF3) |
986 | 1052 |
,0xD4 => array(0xF4), 0xD5 => array(0xF5), 0xD6 => array(0xF6), 0xD8 => array(0xF8) |
987 | 1053 |
,0xD9 => array(0xF9), 0xDA => array(0xFA), 0xDB => array(0xFB), 0xDC => array(0xFC) |
988 |
,0xDD => array(0xFD), 0xDE => array(0xFE) /* Here was German "?" -> "ss", is now configurable */
|
|
1054 |
,0xDD => array(0xFD), 0xDE => array(0xFE), 0xDF => array(0x73, 0x73)
|
|
989 | 1055 |
,0x100 => array(0x101), 0x102 => array(0x103), 0x104 => array(0x105) |
990 | 1056 |
,0x106 => array(0x107), 0x108 => array(0x109), 0x10A => array(0x10B) |
991 | 1057 |
,0x10C => array(0x10D), 0x10E => array(0x10F), 0x110 => array(0x111) |
Also available in: Unified diff
3th party class idna_convert updated to version 0.8.0