Project

General

Profile

« Previous | Next » 

Revision 455

Added by Matthias about 17 years ago

Added wrapper to functions.php if mb_string is not available

View differences:

functions.php
341 341
// Function as replecement for php's htmlspecialchars()
342 342
function my_htmlspecialchars($string) {
343 343
	$string = umlauts_to_entities($string);
344
	$string = entities_to_umlauts($string);
344
	$string = entities_to_umlauts($string, DEFAULT_CHARSET, 1);
345 345
	return($string);
346 346
}
347 347

  
......
363 363
	return $converted_string;
364 364
}
365 365

  
366
// Function as wrapper for mb_convert_encoding
367
function mb_convert_encoding_wrapper($string, $charset_out, $charset_in) {
368
	if (function_exists('mb_convert_encoding')) {
369
		$string=mb_convert_encoding($string, $charset_out, $charset_in);
370
	} else {
371
		if ($charset_in == 'ISO-8859-1') { $mysqlcharset_from = 'latin1'; }
372
		elseif ($charset_in == 'ISO-8859-2') { $mysqlcharset_from = 'latin2'; }
373
		elseif ($charset_in == 'ISO-8859-3') { $mysqlcharset_from = 'latin1'; } //?
374
		elseif ($charset_in == 'ISO-8859-4') { $mysqlcharset_from = 'latin7'; }
375
		elseif ($charset_in == 'ISO-8859-5') { $string = convert_cyr_string ($string, "iso8859-5", "windows-1251" ); $mysqlcharset_from = 'cp1251'; }
376
		elseif ($charset_in == 'ISO-8859-6') { $mysqlcharset_from = 'latin1'; } //? BROKEN
377
		elseif ($charset_in == 'ISO-8859-7') { $mysqlcharset_from = 'greek'; }
378
		elseif ($charset_in == 'ISO-8859-8') { $mysqlcharset_from = 'hebrew'; }
379
		elseif ($charset_in == 'ISO-8859-9') { $mysqlcharset_from = 'latin5'; }
380
		elseif ($charset_in == 'ISO-8859-10') { $mysqlcharset_from = 'latin1'; } //?
381
		elseif ($charset_in == 'BIG5') { $mysqlcharset_from = 'big5'; }
382
		elseif ($charset_in == 'ISO-2022-JP') { $mysqlcharset_from = 'latin1'; } //? BROKEN
383
		elseif ($charset_in == 'ISO-2022-KR') { $mysqlcharset_from = 'latin1'; } //? BROKEN
384
		elseif ($charset_in == 'GB2312') { $mysqlcharset_from = 'gb2312'; }
385
		elseif ($charset_in == 'ISO-8859-11') { $mysqlcharset_from = 'tis620'; }
386
		elseif ($charset_in == 'UTF-8') { $mysqlcharset_from = 'utf8'; }
387
		else { $mysqlcharset_from = 'latin1'; }
388

  
389
		if ($charset_out == 'ISO-8859-1') { $mysqlcharset_to = 'latin1'; }
390
		elseif ($charset_out == 'ISO-8859-2') { $mysqlcharset_to = 'latin2'; }
391
		elseif ($charset_out == 'ISO-8859-3') { $mysqlcharset_to = 'latin1'; } //?
392
		elseif ($charset_out == 'ISO-8859-4') { $mysqlcharset_to = 'latin7'; }
393
		elseif ($charset_out == 'ISO-8859-5') { $mysqlcharset_to = 'cp1251'; } // use convert_cyr_string afterwards
394
		elseif ($charset_out == 'ISO-8859-6') { $mysqlcharset_to = 'latin1'; } //? BROKEN
395
		elseif ($charset_out == 'ISO-8859-7') { $mysqlcharset_to = 'greek'; }
396
		elseif ($charset_out == 'ISO-8859-8') { $mysqlcharset_to = 'hebrew'; }
397
		elseif ($charset_out == 'ISO-8859-9') { $mysqlcharset_to = 'latin5'; }
398
		elseif ($charset_out == 'ISO-8859-10') { $mysqlcharset_to = 'latin1'; } //?
399
		elseif ($charset_out == 'BIG5') { $mysqlcharset_to = 'big5'; }
400
		elseif ($charset_out == 'ISO-2022-JP') { $mysqlcharset_to = 'latin1'; } //? BROKEN
401
		elseif ($charset_out == 'ISO-2022-KR') { $mysqlcharset_to = 'latin1'; } //? BROKEN
402
		elseif ($charset_out == 'GB2312') { $mysqlcharset_to = 'gb2312'; }
403
		elseif ($charset_out == 'ISO-8859-11') { $mysqlcharset_to = 'tis620'; }
404
		elseif ($charset_out == 'UTF-8') { $mysqlcharset_to = 'utf8'; }
405
		else { $mysqlcharset_to = 'latin1'; }
406
        	
407
		if ($charset_in == 'HTML-ENTITIES') { $mysqlcharset_from = 'html'; } // special-case
408
		if ($charset_out == 'HTML-ENTITIES') { $mysqlcharset_to = 'html'; } // special-case
409

  
410
		// use mysql to convert the string
411
		if ($mysqlcharset_from!="html" && $mysqlcharset_to!="html" && $mysqlcharset_from!="" && $mysqlcharset_to!="" && $mysqlcharset_from!=$mysqlcharset_to) {
412
			$string=my_mysql_iconv($string, $mysqlcharset_from, $mysqlcharset_to);
413
			if ($mysqlcharset_to == 'cp1251') { 
414
				$string = convert_cyr_string ($string, "windows-1251", "iso-8859-5" );
415
			}
416
		}
417
		// do the utf8->htmlentities or htmlentities->utf8 translation
418
		if (($mysqlcharset_from=='html' && $mysqlcharset_to=='utf8') || ($mysqlcharset_from=='utf8' && $mysqlcharset_to=='html')) {
419
			if ($mysqlcharset_from == 'html') {
420
				$named_to_numbered_entities=array('Á'=>'Á','á'=>'á','Â'=>'Â',
421
				'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à',
422
				'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä',
423
				'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é',
424
				'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë',
425
				'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î',
426
				'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ',
427
				'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô',
428
				'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ',
429
				'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š',
430
				'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û',
431
				'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý',
432
				'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®',
433
				'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð',
434
				'ø'=>'ø','þ'=>'þ');
435
				$string = strtr($string, $named_to_numbered_entities);
436
				$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
437
			}
438
			elseif ($mysqlcharset_to == 'html') {
439
				$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
440
				$char = "";
441
				while (strlen($string) > 0) {
442
					preg_match("/^(.)(.*)$/su", $string, $match);
443
					if (strlen($match[1]) > 1) {
444
						$char .= "&#".uniord($match[1]).";";
445
					} else $char .= $match[1];
446
					$string = $match[2];
447
				}
448
				$string = $char;
449
				$string_htmlspecialchars_decode=array("&lt;"=>"<", "&gt;"=>">", "&amp;"=>"&", "&quot;"=>"\"", "&#039;'"=>"\'");
450
				$string = strtr($string, $string_htmlspecialchars_decode);
451
				$numbered_to_named_entities=array('&#193;'=>'&Aacute;','&#225;'=>'&aacute;','&#194;'=>'&Acirc;',
452
				'&#226;'=>'&acirc;','&#198;'=>'&AElig;','&#230;'=>'&aelig;','&#192;'=>'&Agrave;','&#224;'=>'&agrave;',
453
				'&#197;'=>'&Aring;','&#229;'=>'&aring;','&#195;'=>'&Atilde;','&#227;'=>'&atilde;','&#196;'=>'&Auml;',
454
				'&#228;'=>'&auml;','&#199;'=>'&Ccedil;','&#231;'=>'&ccedil;','&#201;'=>'&Eacute;','&#233;'=>'&eacute;',
455
				'&#202;'=>'&Ecirc;','&#234;'=>'&ecirc;','&#200;'=>'&Egrave;','&#232;'=>'&egrave;','&#203;'=>'&Euml;',
456
				'&#235;'=>'&euml;','&#205;'=>'&Iacute;','&#237;'=>'&iacute;','&#206;'=>'&Icirc;','&#238;'=>'&icirc;',
457
				'&#204;'=>'&Igrave;','&#236;'=>'&igrave;','&#207;'=>'&Iuml;','&#239;'=>'&iuml;','&#209;'=>'&Ntilde;',
458
				'&#241;'=>'&ntilde;','&#211;'=>'&Oacute;','&#243;'=>'&oacute;','&#212;'=>'&Ocirc;','&#244;'=>'&ocirc;',
459
				'&#338;'=>'&OElig;','&#339;'=>'&oelig;','&#210;'=>'&Ograve;','&#242;'=>'&ograve;','&#213;'=>'&Otilde;',
460
				'&#245;'=>'&otilde;','&#214;'=>'&Ouml;','&#246;'=>'&ouml;','&#352;'=>'&Scaron;','&#353;'=>'&scaron;',
461
				'&#223;'=>'&szlig;','&#218;'=>'&Uacute;','&#250;'=>'&uacute;','&#219;'=>'&Ucirc;','&#251;'=>'&ucirc;',
462
				'&#217;'=>'&Ugrave;','&#249;'=>'&ugrave;','&#220;'=>'&Uuml;','&#252;'=>'&uuml;','&#221;'=>'&Yacute;',
463
				'&#253;'=>'&yacute;','&#376;'=>'&Yuml;','&#255;'=>'&yuml;','&#169;'=>'&copy;','&#174;'=>'&reg;',
464
				'&#208;'=>'&ETH;','&#215;'=>'&times;','&#216;'=>'&Oslash;','&#222;'=>'&THORN;','&#240;'=>'&eth;',
465
				'&#248;'=>'&oslash;','&#254;'=>'&thorn;');
466
				$string = strtr($string, $numbered_to_named_entities);
467
			}
468
		}
469
	}
470
	return($string);
471
}
472
// support-function for mb_convert_encoding_wrapper()
473
function uniord($c) {
474
        $ud = 0;
475
        if (ord($c{0}) >= 0 && ord($c{0}) <= 127) $ud = ord($c{0});
476
        if (ord($c{0}) >= 192 && ord($c{0}) <= 223) $ud = (ord($c{0})-192)*64 + (ord($c{1})-128);
477
        if (ord($c{0}) >= 224 && ord($c{0}) <= 239) $ud = (ord($c{0})-224)*4096 + (ord($c{1})-128)*64 + (ord($c{2})-128);
478
        if (ord($c{0}) >= 240 && ord($c{0}) <= 247) $ud = (ord($c{0})-240)*262144 + (ord($c{1})-128)*4096 + (ord($c{2})-128)*64 + (ord($c{3})-128);
479
        if (ord($c{0}) >= 248 && ord($c{0}) <= 251) $ud = (ord($c{0})-248)*16777216 + (ord($c{1})-128)*262144 + (ord($c{2})-128)*4096 + (ord($c{3})-128)*64 + (ord($c{4})-128);
480
        if (ord($c{0}) >= 252 && ord($c{0}) <= 253) $ud = (ord($c{0})-252)*1073741824 + (ord($c{1})-128)*16777216 + (ord($c{2})-128)*262144 + (ord($c{3})-128)*4096 + (ord($c{4})-128)*64 + (ord($c{5})-128);
481
        if (ord($c{0}) >= 254 && ord($c{0}) <= 255) $ud = false; // error
482
        return $ud;
483
}
484
// support-function for mb_convert_encoding_wrapper()
485
function code_to_utf8($num) {
486
	if ($num <= 0x7F) {
487
		return chr($num);
488
	} elseif ($num <= 0x7FF) {
489
		return chr(($num >> 6) + 192) . chr(($num & 63) + 128);
490
	} elseif ($num <= 0xFFFF) {
491
		 return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
492
	} elseif ($num <= 0x1FFFFF) {
493
		return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
494
	}
495
	return " ";
496
}
497

  
366 498
// Function to convert a string from mixed html-entities/umlauts to pure utf-8-umlauts
367 499
function string_to_utf8($string, $charset=DEFAULT_CHARSET) {
368 500
	$charset = strtoupper($charset);
......
373 505
		$string=my_mysql_iconv($string, 'gb2312', 'utf8');
374 506
	} elseif ($charset == "ISO-8859-11") {
375 507
		$string=my_mysql_iconv($string, 'tis620', 'utf8');
376
	} else {
377
		$string=mb_convert_encoding($string, 'UTF-8', $charset);
508
	} elseif ($charset != "UTF-8") {
509
		$string=mb_convert_encoding_wrapper($string, 'UTF-8', $charset);
378 510
	}
379
	$string=mb_convert_encoding($string, 'HTML-ENTITIES', 'UTF-8');
380
	$string=mb_convert_encoding($string, 'UTF-8', 'HTML-ENTITIES');
511
	$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8');
512
	$string=mb_convert_encoding_wrapper($string, 'UTF-8', 'HTML-ENTITIES');
381 513
	return($string);
382 514
}
383 515

  
......
394 526
		} elseif ($charset_out == "ISO-8859-11") {
395 527
			$string=my_mysql_iconv($string, 'utf8', 'tis620');
396 528
		} else {
397
			$string=mb_convert_encoding($string, $charset_out, 'UTF-8');
529
			$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8');
398 530
		}
399 531
	}
400 532
	if($convert_htmlspecialchars == 1) {
......
413 545
	if($convert_htmlspecialchars == 1) {
414 546
		$string=htmlspecialchars($string,ENT_QUOTES);
415 547
	}
416
	$string=mb_convert_encoding($string,'HTML-ENTITIES','UTF-8');
548
	$string=mb_convert_encoding_wrapper($string,'HTML-ENTITIES','UTF-8');
417 549
	return($string);
418 550
}
419 551

  

Also available in: Unified diff