Revision 455
Added by Matthias over 17 years ago
functions.php | ||
---|---|---|
341 | 341 |
// Function as replecement for php's htmlspecialchars() |
342 | 342 |
function my_htmlspecialchars($string) { |
343 | 343 |
$string = umlauts_to_entities($string); |
344 |
$string = entities_to_umlauts($string); |
|
344 |
$string = entities_to_umlauts($string, DEFAULT_CHARSET, 1);
|
|
345 | 345 |
return($string); |
346 | 346 |
} |
347 | 347 |
|
... | ... | |
363 | 363 |
return $converted_string; |
364 | 364 |
} |
365 | 365 |
|
366 |
// Function as wrapper for mb_convert_encoding |
|
367 |
function mb_convert_encoding_wrapper($string, $charset_out, $charset_in) { |
|
368 |
if (function_exists('mb_convert_encoding')) { |
|
369 |
$string=mb_convert_encoding($string, $charset_out, $charset_in); |
|
370 |
} else { |
|
371 |
if ($charset_in == 'ISO-8859-1') { $mysqlcharset_from = 'latin1'; } |
|
372 |
elseif ($charset_in == 'ISO-8859-2') { $mysqlcharset_from = 'latin2'; } |
|
373 |
elseif ($charset_in == 'ISO-8859-3') { $mysqlcharset_from = 'latin1'; } //? |
|
374 |
elseif ($charset_in == 'ISO-8859-4') { $mysqlcharset_from = 'latin7'; } |
|
375 |
elseif ($charset_in == 'ISO-8859-5') { $string = convert_cyr_string ($string, "iso8859-5", "windows-1251" ); $mysqlcharset_from = 'cp1251'; } |
|
376 |
elseif ($charset_in == 'ISO-8859-6') { $mysqlcharset_from = 'latin1'; } //? BROKEN |
|
377 |
elseif ($charset_in == 'ISO-8859-7') { $mysqlcharset_from = 'greek'; } |
|
378 |
elseif ($charset_in == 'ISO-8859-8') { $mysqlcharset_from = 'hebrew'; } |
|
379 |
elseif ($charset_in == 'ISO-8859-9') { $mysqlcharset_from = 'latin5'; } |
|
380 |
elseif ($charset_in == 'ISO-8859-10') { $mysqlcharset_from = 'latin1'; } //? |
|
381 |
elseif ($charset_in == 'BIG5') { $mysqlcharset_from = 'big5'; } |
|
382 |
elseif ($charset_in == 'ISO-2022-JP') { $mysqlcharset_from = 'latin1'; } //? BROKEN |
|
383 |
elseif ($charset_in == 'ISO-2022-KR') { $mysqlcharset_from = 'latin1'; } //? BROKEN |
|
384 |
elseif ($charset_in == 'GB2312') { $mysqlcharset_from = 'gb2312'; } |
|
385 |
elseif ($charset_in == 'ISO-8859-11') { $mysqlcharset_from = 'tis620'; } |
|
386 |
elseif ($charset_in == 'UTF-8') { $mysqlcharset_from = 'utf8'; } |
|
387 |
else { $mysqlcharset_from = 'latin1'; } |
|
388 |
|
|
389 |
if ($charset_out == 'ISO-8859-1') { $mysqlcharset_to = 'latin1'; } |
|
390 |
elseif ($charset_out == 'ISO-8859-2') { $mysqlcharset_to = 'latin2'; } |
|
391 |
elseif ($charset_out == 'ISO-8859-3') { $mysqlcharset_to = 'latin1'; } //? |
|
392 |
elseif ($charset_out == 'ISO-8859-4') { $mysqlcharset_to = 'latin7'; } |
|
393 |
elseif ($charset_out == 'ISO-8859-5') { $mysqlcharset_to = 'cp1251'; } // use convert_cyr_string afterwards |
|
394 |
elseif ($charset_out == 'ISO-8859-6') { $mysqlcharset_to = 'latin1'; } //? BROKEN |
|
395 |
elseif ($charset_out == 'ISO-8859-7') { $mysqlcharset_to = 'greek'; } |
|
396 |
elseif ($charset_out == 'ISO-8859-8') { $mysqlcharset_to = 'hebrew'; } |
|
397 |
elseif ($charset_out == 'ISO-8859-9') { $mysqlcharset_to = 'latin5'; } |
|
398 |
elseif ($charset_out == 'ISO-8859-10') { $mysqlcharset_to = 'latin1'; } //? |
|
399 |
elseif ($charset_out == 'BIG5') { $mysqlcharset_to = 'big5'; } |
|
400 |
elseif ($charset_out == 'ISO-2022-JP') { $mysqlcharset_to = 'latin1'; } //? BROKEN |
|
401 |
elseif ($charset_out == 'ISO-2022-KR') { $mysqlcharset_to = 'latin1'; } //? BROKEN |
|
402 |
elseif ($charset_out == 'GB2312') { $mysqlcharset_to = 'gb2312'; } |
|
403 |
elseif ($charset_out == 'ISO-8859-11') { $mysqlcharset_to = 'tis620'; } |
|
404 |
elseif ($charset_out == 'UTF-8') { $mysqlcharset_to = 'utf8'; } |
|
405 |
else { $mysqlcharset_to = 'latin1'; } |
|
406 |
|
|
407 |
if ($charset_in == 'HTML-ENTITIES') { $mysqlcharset_from = 'html'; } // special-case |
|
408 |
if ($charset_out == 'HTML-ENTITIES') { $mysqlcharset_to = 'html'; } // special-case |
|
409 |
|
|
410 |
// use mysql to convert the string |
|
411 |
if ($mysqlcharset_from!="html" && $mysqlcharset_to!="html" && $mysqlcharset_from!="" && $mysqlcharset_to!="" && $mysqlcharset_from!=$mysqlcharset_to) { |
|
412 |
$string=my_mysql_iconv($string, $mysqlcharset_from, $mysqlcharset_to); |
|
413 |
if ($mysqlcharset_to == 'cp1251') { |
|
414 |
$string = convert_cyr_string ($string, "windows-1251", "iso-8859-5" ); |
|
415 |
} |
|
416 |
} |
|
417 |
// do the utf8->htmlentities or htmlentities->utf8 translation |
|
418 |
if (($mysqlcharset_from=='html' && $mysqlcharset_to=='utf8') || ($mysqlcharset_from=='utf8' && $mysqlcharset_to=='html')) { |
|
419 |
if ($mysqlcharset_from == 'html') { |
|
420 |
$named_to_numbered_entities=array('Á'=>'Á','á'=>'á','Â'=>'Â', |
|
421 |
'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à', |
|
422 |
'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä', |
|
423 |
'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é', |
|
424 |
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë', |
|
425 |
'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î', |
|
426 |
'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ', |
|
427 |
'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô', |
|
428 |
'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ', |
|
429 |
'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š', |
|
430 |
'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û', |
|
431 |
'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý', |
|
432 |
'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®', |
|
433 |
'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð', |
|
434 |
'ø'=>'ø','þ'=>'þ'); |
|
435 |
$string = strtr($string, $named_to_numbered_entities); |
|
436 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string); |
|
437 |
} |
|
438 |
elseif ($mysqlcharset_to == 'html') { |
|
439 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string); |
|
440 |
$char = ""; |
|
441 |
while (strlen($string) > 0) { |
|
442 |
preg_match("/^(.)(.*)$/su", $string, $match); |
|
443 |
if (strlen($match[1]) > 1) { |
|
444 |
$char .= "&#".uniord($match[1]).";"; |
|
445 |
} else $char .= $match[1]; |
|
446 |
$string = $match[2]; |
|
447 |
} |
|
448 |
$string = $char; |
|
449 |
$string_htmlspecialchars_decode=array("<"=>"<", ">"=>">", "&"=>"&", """=>"\"", "''"=>"\'"); |
|
450 |
$string = strtr($string, $string_htmlspecialchars_decode); |
|
451 |
$numbered_to_named_entities=array('Á'=>'Á','á'=>'á','Â'=>'Â', |
|
452 |
'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à', |
|
453 |
'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä', |
|
454 |
'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é', |
|
455 |
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë', |
|
456 |
'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î', |
|
457 |
'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ', |
|
458 |
'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô', |
|
459 |
'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ', |
|
460 |
'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š', |
|
461 |
'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û', |
|
462 |
'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý', |
|
463 |
'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®', |
|
464 |
'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð', |
|
465 |
'ø'=>'ø','þ'=>'þ'); |
|
466 |
$string = strtr($string, $numbered_to_named_entities); |
|
467 |
} |
|
468 |
} |
|
469 |
} |
|
470 |
return($string); |
|
471 |
} |
|
472 |
// support-function for mb_convert_encoding_wrapper() |
|
473 |
function uniord($c) { |
|
474 |
$ud = 0; |
|
475 |
if (ord($c{0}) >= 0 && ord($c{0}) <= 127) $ud = ord($c{0}); |
|
476 |
if (ord($c{0}) >= 192 && ord($c{0}) <= 223) $ud = (ord($c{0})-192)*64 + (ord($c{1})-128); |
|
477 |
if (ord($c{0}) >= 224 && ord($c{0}) <= 239) $ud = (ord($c{0})-224)*4096 + (ord($c{1})-128)*64 + (ord($c{2})-128); |
|
478 |
if (ord($c{0}) >= 240 && ord($c{0}) <= 247) $ud = (ord($c{0})-240)*262144 + (ord($c{1})-128)*4096 + (ord($c{2})-128)*64 + (ord($c{3})-128); |
|
479 |
if (ord($c{0}) >= 248 && ord($c{0}) <= 251) $ud = (ord($c{0})-248)*16777216 + (ord($c{1})-128)*262144 + (ord($c{2})-128)*4096 + (ord($c{3})-128)*64 + (ord($c{4})-128); |
|
480 |
if (ord($c{0}) >= 252 && ord($c{0}) <= 253) $ud = (ord($c{0})-252)*1073741824 + (ord($c{1})-128)*16777216 + (ord($c{2})-128)*262144 + (ord($c{3})-128)*4096 + (ord($c{4})-128)*64 + (ord($c{5})-128); |
|
481 |
if (ord($c{0}) >= 254 && ord($c{0}) <= 255) $ud = false; // error |
|
482 |
return $ud; |
|
483 |
} |
|
484 |
// support-function for mb_convert_encoding_wrapper() |
|
485 |
function code_to_utf8($num) { |
|
486 |
if ($num <= 0x7F) { |
|
487 |
return chr($num); |
|
488 |
} elseif ($num <= 0x7FF) { |
|
489 |
return chr(($num >> 6) + 192) . chr(($num & 63) + 128); |
|
490 |
} elseif ($num <= 0xFFFF) { |
|
491 |
return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128); |
|
492 |
} elseif ($num <= 0x1FFFFF) { |
|
493 |
return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128); |
|
494 |
} |
|
495 |
return " "; |
|
496 |
} |
|
497 |
|
|
366 | 498 |
// Function to convert a string from mixed html-entities/umlauts to pure utf-8-umlauts |
367 | 499 |
function string_to_utf8($string, $charset=DEFAULT_CHARSET) { |
368 | 500 |
$charset = strtoupper($charset); |
... | ... | |
373 | 505 |
$string=my_mysql_iconv($string, 'gb2312', 'utf8'); |
374 | 506 |
} elseif ($charset == "ISO-8859-11") { |
375 | 507 |
$string=my_mysql_iconv($string, 'tis620', 'utf8'); |
376 |
} else { |
|
377 |
$string=mb_convert_encoding($string, 'UTF-8', $charset); |
|
508 |
} elseif ($charset != "UTF-8") {
|
|
509 |
$string=mb_convert_encoding_wrapper($string, 'UTF-8', $charset);
|
|
378 | 510 |
} |
379 |
$string=mb_convert_encoding($string, 'HTML-ENTITIES', 'UTF-8'); |
|
380 |
$string=mb_convert_encoding($string, 'UTF-8', 'HTML-ENTITIES'); |
|
511 |
$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8');
|
|
512 |
$string=mb_convert_encoding_wrapper($string, 'UTF-8', 'HTML-ENTITIES');
|
|
381 | 513 |
return($string); |
382 | 514 |
} |
383 | 515 |
|
... | ... | |
394 | 526 |
} elseif ($charset_out == "ISO-8859-11") { |
395 | 527 |
$string=my_mysql_iconv($string, 'utf8', 'tis620'); |
396 | 528 |
} else { |
397 |
$string=mb_convert_encoding($string, $charset_out, 'UTF-8'); |
|
529 |
$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8');
|
|
398 | 530 |
} |
399 | 531 |
} |
400 | 532 |
if($convert_htmlspecialchars == 1) { |
... | ... | |
413 | 545 |
if($convert_htmlspecialchars == 1) { |
414 | 546 |
$string=htmlspecialchars($string,ENT_QUOTES); |
415 | 547 |
} |
416 |
$string=mb_convert_encoding($string,'HTML-ENTITIES','UTF-8'); |
|
548 |
$string=mb_convert_encoding_wrapper($string,'HTML-ENTITIES','UTF-8');
|
|
417 | 549 |
return($string); |
418 | 550 |
} |
419 | 551 |
|
Also available in: Unified diff
Added wrapper to functions.php if mb_string is not available