Project

General

Profile

« Previous | Next » 

Revision 461

Added by Matthias almost 17 years ago

Added support for ISO-8859-6. WB now fully supports UTF8 and all ISO charsets except ISO-2022-JP and ISO-2022-KR. This languages only can be used with UTF8.

View differences:

functions.php
364 364
}
365 365

  
366 366
// Function as wrapper for mb_convert_encoding
367
// converts $charset_in to $charset_out or 
368
// UTF-8 to HTML-ENTITIES or HTML-ENTITIES to UTF-8
367 369
function mb_convert_encoding_wrapper($string, $charset_out, $charset_in) {
370
	// try mb_convert_encoding(). This can handle to or from HTML-ENTITIES, too
368 371
	if (function_exists('mb_convert_encoding')) {
369
		$string=mb_convert_encoding($string, $charset_out, $charset_in);
370
	} else {
371
		if ($charset_in == 'ISO-8859-1') { $mysqlcharset_from = 'latin1'; }
372
		elseif ($charset_in == 'ISO-8859-2') { $mysqlcharset_from = 'latin2'; }
373
		elseif ($charset_in == 'ISO-8859-3') { $mysqlcharset_from = 'latin1'; } //?
374
		elseif ($charset_in == 'ISO-8859-4') { $mysqlcharset_from = 'latin7'; }
375
		elseif ($charset_in == 'ISO-8859-5') { $string = convert_cyr_string ($string, "iso8859-5", "windows-1251" ); $mysqlcharset_from = 'cp1251'; }
376
		elseif ($charset_in == 'ISO-8859-6') { $mysqlcharset_from = 'latin1'; } //? BROKEN
377
		elseif ($charset_in == 'ISO-8859-7') { $mysqlcharset_from = 'greek'; }
378
		elseif ($charset_in == 'ISO-8859-8') { $mysqlcharset_from = 'hebrew'; }
379
		elseif ($charset_in == 'ISO-8859-9') { $mysqlcharset_from = 'latin5'; }
380
		elseif ($charset_in == 'ISO-8859-10') { $mysqlcharset_from = 'latin1'; } //?
381
		elseif ($charset_in == 'BIG5') { $mysqlcharset_from = 'big5'; }
382
		elseif ($charset_in == 'ISO-2022-JP') { $mysqlcharset_from = 'latin1'; } //? BROKEN
383
		elseif ($charset_in == 'ISO-2022-KR') { $mysqlcharset_from = 'latin1'; } //? BROKEN
384
		elseif ($charset_in == 'GB2312') { $mysqlcharset_from = 'gb2312'; }
385
		elseif ($charset_in == 'ISO-8859-11') { $mysqlcharset_from = 'tis620'; }
386
		elseif ($charset_in == 'UTF-8') { $mysqlcharset_from = 'utf8'; }
387
		else { $mysqlcharset_from = 'latin1'; }
388

  
389
		if ($charset_out == 'ISO-8859-1') { $mysqlcharset_to = 'latin1'; }
390
		elseif ($charset_out == 'ISO-8859-2') { $mysqlcharset_to = 'latin2'; }
391
		elseif ($charset_out == 'ISO-8859-3') { $mysqlcharset_to = 'latin1'; } //?
392
		elseif ($charset_out == 'ISO-8859-4') { $mysqlcharset_to = 'latin7'; }
393
		elseif ($charset_out == 'ISO-8859-5') { $mysqlcharset_to = 'cp1251'; } // use convert_cyr_string afterwards
394
		elseif ($charset_out == 'ISO-8859-6') { $mysqlcharset_to = 'latin1'; } //? BROKEN
395
		elseif ($charset_out == 'ISO-8859-7') { $mysqlcharset_to = 'greek'; }
396
		elseif ($charset_out == 'ISO-8859-8') { $mysqlcharset_to = 'hebrew'; }
397
		elseif ($charset_out == 'ISO-8859-9') { $mysqlcharset_to = 'latin5'; }
398
		elseif ($charset_out == 'ISO-8859-10') { $mysqlcharset_to = 'latin1'; } //?
399
		elseif ($charset_out == 'BIG5') { $mysqlcharset_to = 'big5'; }
400
		elseif ($charset_out == 'ISO-2022-JP') { $mysqlcharset_to = 'latin1'; } //? BROKEN
401
		elseif ($charset_out == 'ISO-2022-KR') { $mysqlcharset_to = 'latin1'; } //? BROKEN
402
		elseif ($charset_out == 'GB2312') { $mysqlcharset_to = 'gb2312'; }
403
		elseif ($charset_out == 'ISO-8859-11') { $mysqlcharset_to = 'tis620'; }
404
		elseif ($charset_out == 'UTF-8') { $mysqlcharset_to = 'utf8'; }
405
		else { $mysqlcharset_to = 'latin1'; }
406
        	
407
		if ($charset_in == 'HTML-ENTITIES') { $mysqlcharset_from = 'html'; } // special-case
408
		if ($charset_out == 'HTML-ENTITIES') { $mysqlcharset_to = 'html'; } // special-case
409

  
410
		// use mysql to convert the string
411
		if ($mysqlcharset_from!="html" && $mysqlcharset_to!="html" && $mysqlcharset_from!="" && $mysqlcharset_to!="" && $mysqlcharset_from!=$mysqlcharset_to) {
412
			$string=my_mysql_iconv($string, $mysqlcharset_from, $mysqlcharset_to);
413
			if ($mysqlcharset_to == 'cp1251') { 
414
				$string = convert_cyr_string ($string, "windows-1251", "iso-8859-5" );
372
		// there's no GB2312 or ISO-8859-11 encoding in php's mb_* functions
373
		if ($charset_in=='ISO-8859-11' || $charset_in=='GB2312') {
374
			if (function_exists('iconv')) {
375
				$string = iconv($charset_in, 'UTF-8', $string);
415 376
			}
377
			else {
378
				if ($charset_in == 'GB2312') {
379
					$string=my_mysql_iconv($string, 'gb2312', 'utf8');
380
				} else {
381
					$string=my_mysql_iconv($string, 'tis620', 'utf8');
382
				}
383
			}
384
			$charset_in='UTF-8';
385
			if ($charset_out == 'UTF-8') {
386
				return $string;
387
			}
416 388
		}
417
		// do the utf8->htmlentities or htmlentities->utf8 translation
418
		if (($mysqlcharset_from=='html' && $mysqlcharset_to=='utf8') || ($mysqlcharset_from=='utf8' && $mysqlcharset_to=='html')) {
419
			if ($mysqlcharset_from == 'html') {
420
				$named_to_numbered_entities=array('Á'=>'Á','á'=>'á','Â'=>'Â',
421
				'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à',
422
				'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä',
423
				'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é',
424
				'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë',
425
				'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î',
426
				'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ',
427
				'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô',
428
				'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ',
429
				'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š',
430
				'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û',
431
				'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý',
432
				'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®',
433
				'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð',
434
				'ø'=>'ø','þ'=>'þ');
435
				$string = strtr($string, $named_to_numbered_entities);
436
				$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
389
		if ($charset_out=='ISO-8859-11' || $charset_out=='GB2312') {
390
			$string=mb_convert_encoding($string, 'UTF-8', $charset_in);
391
			if (function_exists('iconv')) {
392
				$string = iconv('UTF-8', $charset_out, $string);
437 393
			}
438
			elseif ($mysqlcharset_to == 'html') {
439
				$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
440
				$char = "";
441
				while (strlen($string) > 0) {
442
					preg_match("/^(.)(.*)$/su", $string, $match);
443
					if (strlen($match[1]) > 1) {
444
						$char .= "&#".uniord($match[1]).";";
445
					} else $char .= $match[1];
446
					$string = $match[2];
394
			else {
395
				if ($charset_out == 'GB2312') {
396
					$string=my_mysql_iconv($string, 'utf8', 'gb2312');
397
				} else {
398
					$string=my_mysql_iconv($string, 'utf8', 'tis620');
447 399
				}
448
				$string = $char;
449
				$string_htmlspecialchars_decode=array("&lt;"=>"<", "&gt;"=>">", "&amp;"=>"&", "&quot;"=>"\"", "&#039;'"=>"\'");
450
				$string = strtr($string, $string_htmlspecialchars_decode);
451
				$numbered_to_named_entities=array('&#193;'=>'&Aacute;','&#225;'=>'&aacute;','&#194;'=>'&Acirc;',
452
				'&#226;'=>'&acirc;','&#198;'=>'&AElig;','&#230;'=>'&aelig;','&#192;'=>'&Agrave;','&#224;'=>'&agrave;',
453
				'&#197;'=>'&Aring;','&#229;'=>'&aring;','&#195;'=>'&Atilde;','&#227;'=>'&atilde;','&#196;'=>'&Auml;',
454
				'&#228;'=>'&auml;','&#199;'=>'&Ccedil;','&#231;'=>'&ccedil;','&#201;'=>'&Eacute;','&#233;'=>'&eacute;',
455
				'&#202;'=>'&Ecirc;','&#234;'=>'&ecirc;','&#200;'=>'&Egrave;','&#232;'=>'&egrave;','&#203;'=>'&Euml;',
456
				'&#235;'=>'&euml;','&#205;'=>'&Iacute;','&#237;'=>'&iacute;','&#206;'=>'&Icirc;','&#238;'=>'&icirc;',
457
				'&#204;'=>'&Igrave;','&#236;'=>'&igrave;','&#207;'=>'&Iuml;','&#239;'=>'&iuml;','&#209;'=>'&Ntilde;',
458
				'&#241;'=>'&ntilde;','&#211;'=>'&Oacute;','&#243;'=>'&oacute;','&#212;'=>'&Ocirc;','&#244;'=>'&ocirc;',
459
				'&#338;'=>'&OElig;','&#339;'=>'&oelig;','&#210;'=>'&Ograve;','&#242;'=>'&ograve;','&#213;'=>'&Otilde;',
460
				'&#245;'=>'&otilde;','&#214;'=>'&Ouml;','&#246;'=>'&ouml;','&#352;'=>'&Scaron;','&#353;'=>'&scaron;',
461
				'&#223;'=>'&szlig;','&#218;'=>'&Uacute;','&#250;'=>'&uacute;','&#219;'=>'&Ucirc;','&#251;'=>'&ucirc;',
462
				'&#217;'=>'&Ugrave;','&#249;'=>'&ugrave;','&#220;'=>'&Uuml;','&#252;'=>'&uuml;','&#221;'=>'&Yacute;',
463
				'&#253;'=>'&yacute;','&#376;'=>'&Yuml;','&#255;'=>'&yuml;','&#169;'=>'&copy;','&#174;'=>'&reg;',
464
				'&#208;'=>'&ETH;','&#215;'=>'&times;','&#216;'=>'&Oslash;','&#222;'=>'&THORN;','&#240;'=>'&eth;',
465
				'&#248;'=>'&oslash;','&#254;'=>'&thorn;');
466
				$string = strtr($string, $numbered_to_named_entities);
467 400
			}
401
		} else {
402
			$string=mb_convert_encoding($string, $charset_out, $charset_in);
468 403
		}
404
		return $string;
469 405
	}
470
	return($string);
406

  
407
	// try iconv(). This can't handle to or from HTML-ENTITIES.
408
	if (function_exists('iconv') && $charset_out!='HTML-ENTITIES' && $charset_in!='HTML-ENTITIES' ) {
409
		$string = iconv($charset_in, $charset_out, $string);
410
		return $string;
411
	}
412

  
413
	// do the UTF-8->HTML-ENTITIES or HTML-ENTITIES->UTF-8 translation
414
	if (($charset_in=='HTML-ENTITIES' && $charset_out=='UTF-8') || ($charset_in=='UTF-8' && $charset_out=='HTML-ENTITIES')) {
415
		$named_to_numbered_entities=array(
416
			'&nbsp;'=>'&#160;','&iexcl;'=>'&#161;','&cent;'=>'&#162;','&pound;'=>'&#163;','&curren;'=>'&#164;',
417
			'&yen;'=>'&#165;','&brvbar;'=>'&#166;','&sect;'=>'&#167;','&uml;'=>'&#168;','&ordf;'=>'&#170;',
418
			'&laquo;'=>'&#171;','&not;'=>'&#172;','&shy;'=>'&#173;','&reg;'=>'&#174;','&macr;'=>'&#175;',
419
			'&deg;'=>'&#176;','&plusmn;'=>'&#177;','&sup2;'=>'&#178;','&sup3;'=>'&#179;','&acute;'=>'&#180;',
420
			'&micro;'=>'&#181;','&para;'=>'&#182;','&middot;'=>'&#183;','&cedil;'=>'&#184;','&sup1;'=>'&#185;',
421
			'&ordm;'=>'&#186;','&raquo;'=>'&#187;','&frac14;'=>'&#188;','&frac12;'=>'&#189;','&frac34;'=>'&#190;',
422
			'&iquest;'=>'&#191;','&divide;'=>'&#247;','&empty;'=>'&#8709;','&euro;'=>'&#8364;',
423
			'&Aacute;'=>'&#193;','&aacute;'=>'&#225;','&Acirc;'=>'&#194;',
424
			'&acirc;'=>'&#226;','&AElig;'=>'&#198;','&aelig;'=>'&#230;','&Agrave;'=>'&#192;','&agrave;'=>'&#224;',
425
			'&Aring;'=>'&#197;','&aring;'=>'&#229;','&Atilde;'=>'&#195;','&atilde;'=>'&#227;','&Auml;'=>'&#196;',
426
			'&auml;'=>'&#228;','&Ccedil;'=>'&#199;','&ccedil;'=>'&#231;','&Eacute;'=>'&#201;','&eacute;'=>'&#233;',
427
			'&Ecirc;'=>'&#202;','&ecirc;'=>'&#234;','&Egrave;'=>'&#200;','&egrave;'=>'&#232;','&Euml;'=>'&#203;',
428
			'&euml;'=>'&#235;','&Iacute;'=>'&#205;','&iacute;'=>'&#237;','&Icirc;'=>'&#206;','&icirc;'=>'&#238;',
429
			'&Igrave;'=>'&#204;','&igrave;'=>'&#236;','&Iuml;'=>'&#207;','&iuml;'=>'&#239;','&Ntilde;'=>'&#209;',
430
			'&ntilde;'=>'&#241;','&Oacute;'=>'&#211;','&oacute;'=>'&#243;','&Ocirc;'=>'&#212;','&ocirc;'=>'&#244;',
431
			'&OElig;'=>'&#338;','&oelig;'=>'&#339;','&Ograve;'=>'&#210;','&ograve;'=>'&#242;','&Otilde;'=>'&#213;',
432
			'&otilde;'=>'&#245;','&Ouml;'=>'&#214;','&ouml;'=>'&#246;','&Scaron;'=>'&#352;','&scaron;'=>'&#353;',
433
			'&szlig;'=>'&#223;','&Uacute;'=>'&#218;','&uacute;'=>'&#250;','&Ucirc;'=>'&#219;','&ucirc;'=>'&#251;',
434
			'&Ugrave;'=>'&#217;','&ugrave;'=>'&#249;','&Uuml;'=>'&#220;','&uuml;'=>'&#252;','&Yacute;'=>'&#221;',
435
			'&yacute;'=>'&#253;','&Yuml;'=>'&#376;','&yuml;'=>'&#255;','&copy;'=>'&#169;','&reg;'=>'&#174;',
436
			'&ETH;'=>'&#208;','&times;'=>'&#215;','&Oslash;'=>'&#216;','&THORN;'=>'&#222;','&eth;'=>'&#240;',
437
			'&oslash;'=>'&#248;','&thorn;'=>'&#254;');
438
		$numbered_to_named_entities=array('&#193;'=>'&Aacute;','&#225;'=>'&aacute;','&#194;'=>'&Acirc;',
439
			'&#160;'=>'&nbsp;','&#161;'=>'&iexcl;','&#162;'=>'&cent;','&#163;'=>'&pound;','&#164;'=>'&curren;',
440
			'&#165;'=>'&yen;','&#166;'=>'&brvbar;','&#167;'=>'&sect;','&#168;'=>'&uml;','&#170;'=>'&ordf;',
441
			'&#171;'=>'&laquo;','&#172;'=>'&not;','&#173;'=>'&shy;','&#174;'=>'&reg;','&#175;'=>'&macr;',
442
			'&#176;'=>'&deg;','&#177;'=>'&plusmn;','&#178;'=>'&sup2;','&#179;'=>'&sup3;','&#180;'=>'&acute;',
443
			'&#181;'=>'&micro;','&#182;'=>'&para;','&#183;'=>'&middot;','&#184;'=>'&cedil;','&#185;'=>'&sup1;',
444
			'&#186;'=>'&ordm;','&#187;'=>'&raquo;','&#188;'=>'&frac14;','&#189;'=>'&frac12;','&#190;'=>'&frac34;',
445
			'&#191;'=>'&iquest;','&#247;'=>'&divide;','&#8709;'=>'&empty;','&#8364;'=>'&euro;',
446
			'&#226;'=>'&acirc;','&#198;'=>'&AElig;','&#230;'=>'&aelig;','&#192;'=>'&Agrave;','&#224;'=>'&agrave;',
447
			'&#197;'=>'&Aring;','&#229;'=>'&aring;','&#195;'=>'&Atilde;','&#227;'=>'&atilde;','&#196;'=>'&Auml;',
448
			'&#228;'=>'&auml;','&#199;'=>'&Ccedil;','&#231;'=>'&ccedil;','&#201;'=>'&Eacute;','&#233;'=>'&eacute;',
449
			'&#202;'=>'&Ecirc;','&#234;'=>'&ecirc;','&#200;'=>'&Egrave;','&#232;'=>'&egrave;','&#203;'=>'&Euml;',
450
			'&#235;'=>'&euml;','&#205;'=>'&Iacute;','&#237;'=>'&iacute;','&#206;'=>'&Icirc;','&#238;'=>'&icirc;',
451
			'&#204;'=>'&Igrave;','&#236;'=>'&igrave;','&#207;'=>'&Iuml;','&#239;'=>'&iuml;','&#209;'=>'&Ntilde;',
452
			'&#241;'=>'&ntilde;','&#211;'=>'&Oacute;','&#243;'=>'&oacute;','&#212;'=>'&Ocirc;','&#244;'=>'&ocirc;',
453
			'&#338;'=>'&OElig;','&#339;'=>'&oelig;','&#210;'=>'&Ograve;','&#242;'=>'&ograve;','&#213;'=>'&Otilde;',
454
			'&#245;'=>'&otilde;','&#214;'=>'&Ouml;','&#246;'=>'&ouml;','&#352;'=>'&Scaron;','&#353;'=>'&scaron;',
455
			'&#223;'=>'&szlig;','&#218;'=>'&Uacute;','&#250;'=>'&uacute;','&#219;'=>'&Ucirc;','&#251;'=>'&ucirc;',
456
			'&#217;'=>'&Ugrave;','&#249;'=>'&ugrave;','&#220;'=>'&Uuml;','&#252;'=>'&uuml;','&#221;'=>'&Yacute;',
457
			'&#253;'=>'&yacute;','&#376;'=>'&Yuml;','&#255;'=>'&yuml;','&#169;'=>'&copy;','&#174;'=>'&reg;',
458
			'&#208;'=>'&ETH;','&#215;'=>'&times;','&#216;'=>'&Oslash;','&#222;'=>'&THORN;','&#240;'=>'&eth;',
459
			'&#248;'=>'&oslash;','&#254;'=>'&thorn;');
460
		if ($charset_in == 'HTML-ENTITIES') {
461
			$string = strtr($string, $named_to_numbered_entities);
462
			$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
463
		}
464
		elseif ($charset_out == 'HTML-ENTITIES') {
465
			$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
466
			$char = "";
467
			while (strlen($string) > 0) {
468
				preg_match("/^(.)(.*)$/su", $string, $match);
469
				if (strlen($match[1]) > 1) {
470
					$char .= "&#".uniord($match[1]).";";
471
				} else $char .= $match[1];
472
				$string = $match[2];
473
			}
474
			$string = $char;
475
			$string_htmlspecialchars_decode=array("&lt;"=>"<", "&gt;"=>">", "&amp;"=>"&", "&quot;"=>"\"", "&#039;"=>"\'");
476
			$string = strtr($string, $string_htmlspecialchars_decode);
477
			$string = strtr($string, $numbered_to_named_entities);
478
		}
479
		return $string;
480
	}
481

  
482
	// mb_convert_encoding() and iconv() aren't available, so use my_mysql_iconv()
483
	if ($charset_in == 'ISO-8859-1') { $mysqlcharset_from = 'latin1'; }
484
	elseif ($charset_in == 'ISO-8859-2') { $mysqlcharset_from = 'latin2'; }
485
	elseif ($charset_in == 'ISO-8859-3') { $mysqlcharset_from = 'latin1'; }
486
	elseif ($charset_in == 'ISO-8859-4') { $mysqlcharset_from = 'latin7'; }
487
	elseif ($charset_in == 'ISO-8859-5') { $string = convert_cyr_string ($string, "iso8859-5", "windows-1251" ); $mysqlcharset_from = 'cp1251'; }
488
	elseif ($charset_in == 'ISO-8859-6') { $mysqlcharset_from = ''; } //?
489
	elseif ($charset_in == 'ISO-8859-7') { $mysqlcharset_from = 'greek'; }
490
	elseif ($charset_in == 'ISO-8859-8') { $mysqlcharset_from = 'hebrew'; }
491
	elseif ($charset_in == 'ISO-8859-9') { $mysqlcharset_from = 'latin5'; }
492
	elseif ($charset_in == 'ISO-8859-10') { $mysqlcharset_from = 'latin1'; }
493
	elseif ($charset_in == 'BIG5') { $mysqlcharset_from = 'big5'; }
494
	elseif ($charset_in == 'ISO-2022-JP') { $mysqlcharset_from = ''; } //?
495
	elseif ($charset_in == 'ISO-2022-KR') { $mysqlcharset_from = ''; } //?
496
	elseif ($charset_in == 'GB2312') { $mysqlcharset_from = 'gb2312'; }
497
	elseif ($charset_in == 'ISO-8859-11') { $mysqlcharset_from = 'tis620'; }
498
	elseif ($charset_in == 'UTF-8') { $mysqlcharset_from = 'utf8'; }
499
	else { $mysqlcharset_from = 'latin1'; }
500

  
501
	if ($charset_out == 'ISO-8859-1') { $mysqlcharset_to = 'latin1'; }
502
	elseif ($charset_out == 'ISO-8859-2') { $mysqlcharset_to = 'latin2'; }
503
	elseif ($charset_out == 'ISO-8859-3') { $mysqlcharset_to = 'latin1'; }
504
	elseif ($charset_out == 'ISO-8859-4') { $mysqlcharset_to = 'latin7'; }
505
	elseif ($charset_out == 'ISO-8859-5') { $mysqlcharset_to = 'cp1251'; } // use convert_cyr_string afterwards
506
	elseif ($charset_out == 'ISO-8859-6') { $mysqlcharset_to = ''; } //?
507
	elseif ($charset_out == 'ISO-8859-7') { $mysqlcharset_to = 'greek'; }
508
	elseif ($charset_out == 'ISO-8859-8') { $mysqlcharset_to = 'hebrew'; }
509
	elseif ($charset_out == 'ISO-8859-9') { $mysqlcharset_to = 'latin5'; }
510
	elseif ($charset_out == 'ISO-8859-10') { $mysqlcharset_to = 'latin1'; }
511
	elseif ($charset_out == 'BIG5') { $mysqlcharset_to = 'big5'; }
512
	elseif ($charset_out == 'ISO-2022-JP') { $mysqlcharset_to = ''; } //?
513
	elseif ($charset_out == 'ISO-2022-KR') { $mysqlcharset_to = ''; } //?
514
	elseif ($charset_out == 'GB2312') { $mysqlcharset_to = 'gb2312'; }
515
	elseif ($charset_out == 'ISO-8859-11') { $mysqlcharset_to = 'tis620'; }
516
	elseif ($charset_out == 'UTF-8') { $mysqlcharset_to = 'utf8'; }
517
	else { $mysqlcharset_to = 'latin1'; }
518

  
519
	if ($mysqlcharset_from!="" && $mysqlcharset_to!="" && $mysqlcharset_from!=$mysqlcharset_to) {
520
		$string=my_mysql_iconv($string, $mysqlcharset_from, $mysqlcharset_to);
521
		if ($mysqlcharset_to == 'cp1251') { 
522
			$string = convert_cyr_string ($string, "windows-1251", "iso-8859-5" );
523
		}
524
		return($string);
525
	}
526

  
527
	// $string is unchanged. This will happen if we have to deal with ISO-8859-6 or ISO-2022-JP or -KR
528
	// and mbstring _and_ iconv aren't available.
529
	return $string;
471 530
}
472 531
// support-function for mb_convert_encoding_wrapper()
473 532
function uniord($c) {
......
500 559
	$charset = strtoupper($charset);
501 560
	if ($charset == '') { $charset = 'ISO-8859-1'; }
502 561

  
503
	// there's no GB2312 or ISO-8859-11 encoding in php's mb_* functions
504
	if ($charset == "GB2312") {
505
		$string=my_mysql_iconv($string, 'gb2312', 'utf8');
506
	} elseif ($charset == "ISO-8859-11") {
507
		$string=my_mysql_iconv($string, 'tis620', 'utf8');
508
	} elseif ($charset != "UTF-8") {
562
	if (!is_UTF8($string)) {
509 563
		$string=mb_convert_encoding_wrapper($string, 'UTF-8', $charset);
564
	} else {
510 565
	}
511
	$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8');
512
	$string=mb_convert_encoding_wrapper($string, 'UTF-8', 'HTML-ENTITIES');
566

  
567
	// check if we really get UTF-8. We don't get UTF-8 if charset is ISO-8859-11 or GB2312 and mb_string AND iconv aren't available.
568
	if (is_UTF8($string)) {
569
		$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8');
570
		$string=mb_convert_encoding_wrapper($string, 'UTF-8', 'HTML-ENTITIES');
571
	} else {
572
	}
513 573
	return($string);
514 574
}
515 575

  
576
// function to check if a string is UTF-8
577
function is_UTF8 ($string) {
578
	return preg_match('%^(?:[\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$%xs', $string);
579
}
580

  
516 581
// Function to convert a string from mixed html-entities/umlauts to pure $charset_out-umlauts
517 582
function entities_to_umlauts($string, $charset_out=DEFAULT_CHARSET, $convert_htmlspecialchars=0) {
518 583
	$charset_out = strtoupper($charset_out);
519
	if ($charset_out == '') {
520
		$charset_out = 'ISO-8859-1';
521
	}
584
	if ($charset_out == '') { $charset_out = 'ISO-8859-1'; }
522 585
	$string = string_to_utf8($string);
523
	if($charset_out != 'UTF-8') {
524
		if ($charset_out == "GB2312") {
525
			$string=my_mysql_iconv($string, 'utf8', 'gb2312');
526
		} elseif ($charset_out == "ISO-8859-11") {
527
			$string=my_mysql_iconv($string, 'utf8', 'tis620');
528
		} else {
529
			$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8');
530
		}
531
	}
532 586
	if($convert_htmlspecialchars == 1) {
533 587
		$string=htmlspecialchars($string);
534 588
	}
589
	if($charset_out!='UTF-8' && is_UTF8($string)) {
590
		$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8');
591
	}
535 592
	return($string);
536 593
}
537 594

  
538 595
// Function to convert a string from mixed html-entitites/$charset_in-umlauts to pure html-entities
539 596
function umlauts_to_entities($string, $charset_in=DEFAULT_CHARSET, $convert_htmlspecialchars=1) {
540 597
	$charset_in = strtoupper($charset_in);
541
	if ($charset_in == "") {
542
		$charset_in = 'ISO-8859-1';
543
	}
598
	if ($charset_in == "") { $charset_in = 'ISO-8859-1'; }
544 599
	$string = string_to_utf8($string, $charset_in);
545 600
	if($convert_htmlspecialchars == 1) {
546 601
		$string=htmlspecialchars($string,ENT_QUOTES);
547 602
	}
548
	$string=mb_convert_encoding_wrapper($string,'HTML-ENTITIES','UTF-8');
603
	if (is_UTF8($string)) {
604
		$string=mb_convert_encoding_wrapper($string,'HTML-ENTITIES','UTF-8');
605
	}
549 606
	return($string);
550 607
}
551 608

  
552 609
// translate any latin/greek/cyrillic html-entities to their plain 7bit equivalents
610
// and numbered-entities into hex
553 611
function entities_to_7bit($string) {
554 612
	require(WB_PATH.'/framework/convert.php');
555 613
	$string = strtr($string, $conversion_array);
614
	$string = preg_replace('/&#([0-9]+);/e', "dechex('$1')",  $string);
556 615
	return($string);
557 616
}
558 617

  

Also available in: Unified diff