Project

General

Profile

« Previous | Next » 

Revision 1748

Added by darkviper almost 12 years ago

3th party class idna_convert updated to version 0.8.0

View differences:

idna_convert.class.php
36 36
 * simple strings and complete email addresses as well. That means, that you might
37 37
 * use any of the following notations:
38 38
 *
39
 * - www.n?rgler.com
39
 * - www.nรถrgler.com
40 40
 * - xn--nrgler-wxa
41 41
 * - xn--brse-5qa.xn--knrz-1ra.info
42 42
 *
......
47 47
 * ACE input and output is always expected to be ASCII.
48 48
 *
49 49
 * @author  Matthias Sommerfeld <mso@phlylabs.de>
50
 * @author  Leonid Kogan <lko@neuse.de>
51
 * @copyright 2004-2010 phlyLabs Berlin, http://phlylabs.de
52
 * @version 0.6.9 2010-11-04
50
 * @copyright 2004-2011 phlyLabs Berlin, http://phlylabs.de
51
 * @version 0.8.0 2011-03-11
53 52
 */
54 53
class idna_convert
55 54
{
......
77 76
    protected $_scount = 11172; // _lcount * _tcount * _vcount
78 77
    protected $_error = false;
79 78

  
79
    protected static $_mb_string_overload = null;
80

  
80 81
    // See {@link set_paramter()} for details of how to change the following
81 82
    // settings from within your script / application
82 83
    protected $_api_encoding = 'utf8';   // Default input charset is UTF-8
83 84
    protected $_allow_overlong = false;  // Overlong UTF-8 encodings are forbidden
84 85
    protected $_strict_mode = false;     // Behave strict or not
85
    protected $_encode_german_sz = true; // True to encode German ?; False, if not
86
    protected $_idn_version = 2003;      // Can be either 2003 (old, default) or 2008
86 87

  
87 88
    /**
88 89
     * the constructor
......
95 96
    {
96 97
        $this->slast = $this->_sbase + $this->_lcount * $this->_vcount * $this->_tcount;
97 98
        // If parameters are given, pass these to the respective method
98
        if (is_array($options)) return $this->set_parameter($options);
99
        if (!$this->_encode_german_sz) {
100
            $this->NP['replacemaps'][0xDF] = array(0x73, 0x73);
99
        if (is_array($options)) {
100
            $this->set_parameter($options);
101 101
        }
102

  
103
        // populate mbstring overloading cache if not set
104
        if (self::$_mb_string_overload === null) {
105
            self::$_mb_string_overload = (extension_loaded('mbstring')
106
                && (ini_get('mbstring.func_overload') & 0x02) === 0x02);
107
        }
102 108
    }
103 109

  
104 110
    /**
......
141 147
            case 'strict':
142 148
                $this->_strict_mode = ($v) ? true : false;
143 149
                break;
144
            case 'encode_german_sz':
145
                $this->_encode_german_sz = ($v) ? true : false;
150
            case 'idn_version':
151
                if (in_array($v, array('2003', '2008'))) {
152
                    $this->_idn_version = $v;
153
                } else {
154
                    $this->_error('Set Parameter: Unknown parameter '.$v.' for option '.$k);
155
                }
146 156
                break;
157
            case 'encode_german_sz': // Deprecated
158
                if (!$v) {
159
                    self::$NP['replacemaps'][0xDF] = array(0x73, 0x73);
160
                } else {
161
                    unset(self::$NP['replacemaps'][0xDF]);
162
                }
163
                break;
147 164
            default:
148 165
                $this->_error('Set Parameter: Unknown option '.$k);
149 166
                return false;
......
399 416
        }
400 417
        // Find last occurence of the delimiter
401 418
        $delim_pos = strrpos($encoded, '-');
402
        if ($delim_pos > strlen($this->_punycode_prefix)) {
403
            for ($k = strlen($this->_punycode_prefix); $k < $delim_pos; ++$k) {
419
        if ($delim_pos > self::byteLength($this->_punycode_prefix)) {
420
            for ($k = self::byteLength($this->_punycode_prefix); $k < $delim_pos; ++$k) {
404 421
                $decoded[] = ord($encoded{$k});
405 422
            }
406 423
        }
407 424
        $deco_len = count($decoded);
408
        $enco_len = strlen($encoded);
425
        $enco_len = self::byteLength($encoded);
409 426

  
410 427
        // Wandering through the strings; init
411 428
        $is_first = true;
......
443 460
    protected function _encode($decoded)
444 461
    {
445 462
        // We cannot encode a domain name containing the Punycode prefix
446
        $extract = strlen($this->_punycode_prefix);
463
        $extract = self::byteLength($this->_punycode_prefix);
447 464
        $check_pref = $this->_utf8_to_ucs4($this->_punycode_prefix);
448 465
        $check_deco = array_slice($decoded, 0, $extract);
449 466

  
......
590 607
        // While mapping required chars we apply the cannonical ordering
591 608
        foreach ($input as $v) {
592 609
            // Map to nothing == skip that code point
593
            if (in_array($v, $this->NP['map_nothing'])) continue;
610
            if (in_array($v, self::$NP['map_nothing'])) continue;
594 611
            // Try to find prohibited input
595
            if (in_array($v, $this->NP['prohibit']) || in_array($v, $this->NP['general_prohibited'])) {
612
            if (in_array($v, self::$NP['prohibit']) || in_array($v, self::$NP['general_prohibited'])) {
596 613
                $this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v));
597 614
                return false;
598 615
            }
599
            foreach ($this->NP['prohibit_ranges'] as $range) {
616
            foreach (self::$NP['prohibit_ranges'] as $range) {
600 617
                if ($range[0] <= $v && $v <= $range[1]) {
601 618
                    $this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v));
602 619
                    return false;
603 620
                }
604 621
            }
605
            // Hangul syllable decomposition
622

  
606 623
            if (0xAC00 <= $v && $v <= 0xD7AF) {
607
                foreach ($this->_hangul_decompose($v) as $out) $output[] = (int) $out;
608
            // There's a decomposition mapping for that code point
609
            } elseif (isset($this->NP['replacemaps'][$v])) {
610
                foreach ($this->_apply_cannonical_ordering($this->NP['replacemaps'][$v]) as $out) {
624
                // Hangul syllable decomposition
625
                foreach ($this->_hangul_decompose($v) as $out) {
611 626
                    $output[] = (int) $out;
612 627
                }
628
            } elseif (($this->_idn_version == '2003') && isset(self::$NP['replacemaps'][$v])) {
629
                // There's a decomposition mapping for that code point
630
                // Decompositions only in version 2003 (original) of IDNA
631
                foreach ($this->_apply_cannonical_ordering(self::$NP['replacemaps'][$v]) as $out) {
632
                    $output[] = (int) $out;
633
                }
613 634
            } else {
614 635
                $output[] = (int) $v;
615 636
            }
......
716 737
     */
717 738
    protected function _get_combining_class($char)
718 739
    {
719
        return isset($this->NP['norm_combcls'][$char]) ? $this->NP['norm_combcls'][$char] : 0;
740
        return isset(self::$NP['norm_combcls'][$char]) ? self::$NP['norm_combcls'][$char] : 0;
720 741
    }
721 742

  
722 743
    /**
723
     * Apllies the cannonical ordering of a decomposed UCS4 sequence
744
     * Applies the cannonical ordering of a decomposed UCS4 sequence
724 745
     * @param    array      Decomposed UCS4 sequence
725 746
     * @return   array      Ordered USC4 sequence
726 747
     */
......
759 780
    protected function _combine($input)
760 781
    {
761 782
        $inp_len = count($input);
762
        foreach ($this->NP['replacemaps'] as $np_src => $np_target) {
783
        foreach (self::$NP['replacemaps'] as $np_src => $np_target) {
763 784
            if ($np_target[0] != $input[0]) continue;
764 785
            if (count($np_target) != $inp_len) continue;
765 786
            $hit = false;
......
798 819
    {
799 820
        $output = array();
800 821
        $out_len = 0;
801
        // Patch by Daniel Hahler; work around prolbem with mbstring.func_overload
802
        if (function_exists('mb_strlen')) {
803
            $inp_len = mb_strlen($input, '8bit');
804
        } else {
805
            $inp_len = strlen($input);
806
        }
822
        $inp_len = self::byteLength($input);
807 823
        $mode = 'next';
808 824
        $test = 'none';
809 825
        for ($k = 0; $k < $inp_len; ++$k) {
......
924 940
    protected function _ucs4_string_to_ucs4($input)
925 941
    {
926 942
        $output = array();
927
        $inp_len = strlen($input);
943
        $inp_len = self::byteLength($input);
928 944
        // Input length must be dividable by 4
929 945
        if ($inp_len % 4) {
930 946
            $this->_error('Input UCS4 string is broken');
......
944 960
    }
945 961

  
946 962
    /**
963
     * Gets the length of a string in bytes even if mbstring function
964
     * overloading is turned on
965
     *
966
     * @param string $string the string for which to get the length.
967
     * @return integer the length of the string in bytes.
968
     */
969
    protected static function byteLength($string)
970
    {
971
        if (self::$_mb_string_overload) {
972
            return mb_strlen($string, '8bit');
973
        }
974
        return strlen((binary) $string);
975
    }
976

  
977
    /**
978
     * Attempts to return a concrete IDNA instance.
979
     *
980
     * @param array $params Set of paramaters
981
     * @return idna_convert
982
     * @access public
983
     */
984
    public function getInstance($params = array())
985
    {
986
        return new idna_convert($params);
987
    }
988

  
989
    /**
990
     * Attempts to return a concrete IDNA instance for either php4 or php5,
991
     * only creating a new instance if no IDNA instance with the same
992
     * parameters currently exists.
993
     *
994
     * @param array $params Set of paramaters
995
     *
996
     * @return object idna_convert
997
     * @access public
998
     */
999
    public function singleton($params = array())
1000
    {
1001
        static $instances;
1002
        if (!isset($instances)) {
1003
            $instances = array();
1004
        }
1005
        $signature = serialize($params);
1006
        if (!isset($instances[$signature])) {
1007
            $instances[$signature] = idna_convert::getInstance($params);
1008
        }
1009
        return $instances[$signature];
1010
    }
1011

  
1012
    /**
947 1013
     * Holds all relevant mapping tables
948 1014
     * See RFC3454 for details
949 1015
     *
950 1016
     * @private array
951 1017
     * @since 0.5.2
952 1018
     */
953
    protected $NP = array
1019
    protected static $NP = array
954 1020
            ('map_nothing' => array(0xAD, 0x34F, 0x1806, 0x180B, 0x180C, 0x180D, 0x200B, 0x200C
955 1021
                    ,0x200D, 0x2060, 0xFE00, 0xFE01, 0xFE02, 0xFE03, 0xFE04, 0xFE05, 0xFE06, 0xFE07
956 1022
                    ,0xFE08, 0xFE09, 0xFE0A, 0xFE0B, 0xFE0C, 0xFE0D, 0xFE0E, 0xFE0F, 0xFEFF
......
985 1051
                    ,0xD0 => array(0xF0), 0xD1 => array(0xF1), 0xD2 => array(0xF2), 0xD3 => array(0xF3)
986 1052
                    ,0xD4 => array(0xF4), 0xD5 => array(0xF5), 0xD6 => array(0xF6), 0xD8 => array(0xF8)
987 1053
                    ,0xD9 => array(0xF9), 0xDA => array(0xFA), 0xDB => array(0xFB), 0xDC => array(0xFC)
988
                    ,0xDD => array(0xFD), 0xDE => array(0xFE) /* Here was German "?" -> "ss", is now configurable */
1054
                    ,0xDD => array(0xFD), 0xDE => array(0xFE), 0xDF => array(0x73, 0x73)
989 1055
                    ,0x100 => array(0x101), 0x102 => array(0x103), 0x104 => array(0x105)
990 1056
                    ,0x106 => array(0x107), 0x108 => array(0x109), 0x10A => array(0x10B)
991 1057
                    ,0x10C => array(0x10D), 0x10E => array(0x10F), 0x110 => array(0x111)

Also available in: Unified diff