364 |
364 |
}
|
365 |
365 |
|
366 |
366 |
// Function as wrapper for mb_convert_encoding
|
|
367 |
// converts $charset_in to $charset_out or
|
|
368 |
// UTF-8 to HTML-ENTITIES or HTML-ENTITIES to UTF-8
|
367 |
369 |
function mb_convert_encoding_wrapper($string, $charset_out, $charset_in) {
|
|
370 |
// try mb_convert_encoding(). This can handle to or from HTML-ENTITIES, too
|
368 |
371 |
if (function_exists('mb_convert_encoding')) {
|
369 |
|
$string=mb_convert_encoding($string, $charset_out, $charset_in);
|
370 |
|
} else {
|
371 |
|
if ($charset_in == 'ISO-8859-1') { $mysqlcharset_from = 'latin1'; }
|
372 |
|
elseif ($charset_in == 'ISO-8859-2') { $mysqlcharset_from = 'latin2'; }
|
373 |
|
elseif ($charset_in == 'ISO-8859-3') { $mysqlcharset_from = 'latin1'; } //?
|
374 |
|
elseif ($charset_in == 'ISO-8859-4') { $mysqlcharset_from = 'latin7'; }
|
375 |
|
elseif ($charset_in == 'ISO-8859-5') { $string = convert_cyr_string ($string, "iso8859-5", "windows-1251" ); $mysqlcharset_from = 'cp1251'; }
|
376 |
|
elseif ($charset_in == 'ISO-8859-6') { $mysqlcharset_from = 'latin1'; } //? BROKEN
|
377 |
|
elseif ($charset_in == 'ISO-8859-7') { $mysqlcharset_from = 'greek'; }
|
378 |
|
elseif ($charset_in == 'ISO-8859-8') { $mysqlcharset_from = 'hebrew'; }
|
379 |
|
elseif ($charset_in == 'ISO-8859-9') { $mysqlcharset_from = 'latin5'; }
|
380 |
|
elseif ($charset_in == 'ISO-8859-10') { $mysqlcharset_from = 'latin1'; } //?
|
381 |
|
elseif ($charset_in == 'BIG5') { $mysqlcharset_from = 'big5'; }
|
382 |
|
elseif ($charset_in == 'ISO-2022-JP') { $mysqlcharset_from = 'latin1'; } //? BROKEN
|
383 |
|
elseif ($charset_in == 'ISO-2022-KR') { $mysqlcharset_from = 'latin1'; } //? BROKEN
|
384 |
|
elseif ($charset_in == 'GB2312') { $mysqlcharset_from = 'gb2312'; }
|
385 |
|
elseif ($charset_in == 'ISO-8859-11') { $mysqlcharset_from = 'tis620'; }
|
386 |
|
elseif ($charset_in == 'UTF-8') { $mysqlcharset_from = 'utf8'; }
|
387 |
|
else { $mysqlcharset_from = 'latin1'; }
|
388 |
|
|
389 |
|
if ($charset_out == 'ISO-8859-1') { $mysqlcharset_to = 'latin1'; }
|
390 |
|
elseif ($charset_out == 'ISO-8859-2') { $mysqlcharset_to = 'latin2'; }
|
391 |
|
elseif ($charset_out == 'ISO-8859-3') { $mysqlcharset_to = 'latin1'; } //?
|
392 |
|
elseif ($charset_out == 'ISO-8859-4') { $mysqlcharset_to = 'latin7'; }
|
393 |
|
elseif ($charset_out == 'ISO-8859-5') { $mysqlcharset_to = 'cp1251'; } // use convert_cyr_string afterwards
|
394 |
|
elseif ($charset_out == 'ISO-8859-6') { $mysqlcharset_to = 'latin1'; } //? BROKEN
|
395 |
|
elseif ($charset_out == 'ISO-8859-7') { $mysqlcharset_to = 'greek'; }
|
396 |
|
elseif ($charset_out == 'ISO-8859-8') { $mysqlcharset_to = 'hebrew'; }
|
397 |
|
elseif ($charset_out == 'ISO-8859-9') { $mysqlcharset_to = 'latin5'; }
|
398 |
|
elseif ($charset_out == 'ISO-8859-10') { $mysqlcharset_to = 'latin1'; } //?
|
399 |
|
elseif ($charset_out == 'BIG5') { $mysqlcharset_to = 'big5'; }
|
400 |
|
elseif ($charset_out == 'ISO-2022-JP') { $mysqlcharset_to = 'latin1'; } //? BROKEN
|
401 |
|
elseif ($charset_out == 'ISO-2022-KR') { $mysqlcharset_to = 'latin1'; } //? BROKEN
|
402 |
|
elseif ($charset_out == 'GB2312') { $mysqlcharset_to = 'gb2312'; }
|
403 |
|
elseif ($charset_out == 'ISO-8859-11') { $mysqlcharset_to = 'tis620'; }
|
404 |
|
elseif ($charset_out == 'UTF-8') { $mysqlcharset_to = 'utf8'; }
|
405 |
|
else { $mysqlcharset_to = 'latin1'; }
|
406 |
|
|
407 |
|
if ($charset_in == 'HTML-ENTITIES') { $mysqlcharset_from = 'html'; } // special-case
|
408 |
|
if ($charset_out == 'HTML-ENTITIES') { $mysqlcharset_to = 'html'; } // special-case
|
409 |
|
|
410 |
|
// use mysql to convert the string
|
411 |
|
if ($mysqlcharset_from!="html" && $mysqlcharset_to!="html" && $mysqlcharset_from!="" && $mysqlcharset_to!="" && $mysqlcharset_from!=$mysqlcharset_to) {
|
412 |
|
$string=my_mysql_iconv($string, $mysqlcharset_from, $mysqlcharset_to);
|
413 |
|
if ($mysqlcharset_to == 'cp1251') {
|
414 |
|
$string = convert_cyr_string ($string, "windows-1251", "iso-8859-5" );
|
|
372 |
// there's no GB2312 or ISO-8859-11 encoding in php's mb_* functions
|
|
373 |
if ($charset_in=='ISO-8859-11' || $charset_in=='GB2312') {
|
|
374 |
if (function_exists('iconv')) {
|
|
375 |
$string = iconv($charset_in, 'UTF-8', $string);
|
415 |
376 |
}
|
|
377 |
else {
|
|
378 |
if ($charset_in == 'GB2312') {
|
|
379 |
$string=my_mysql_iconv($string, 'gb2312', 'utf8');
|
|
380 |
} else {
|
|
381 |
$string=my_mysql_iconv($string, 'tis620', 'utf8');
|
|
382 |
}
|
|
383 |
}
|
|
384 |
$charset_in='UTF-8';
|
|
385 |
if ($charset_out == 'UTF-8') {
|
|
386 |
return $string;
|
|
387 |
}
|
416 |
388 |
}
|
417 |
|
// do the utf8->htmlentities or htmlentities->utf8 translation
|
418 |
|
if (($mysqlcharset_from=='html' && $mysqlcharset_to=='utf8') || ($mysqlcharset_from=='utf8' && $mysqlcharset_to=='html')) {
|
419 |
|
if ($mysqlcharset_from == 'html') {
|
420 |
|
$named_to_numbered_entities=array('Á'=>'Á','á'=>'á','Â'=>'Â',
|
421 |
|
'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à',
|
422 |
|
'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä',
|
423 |
|
'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é',
|
424 |
|
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë',
|
425 |
|
'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î',
|
426 |
|
'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ',
|
427 |
|
'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô',
|
428 |
|
'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ',
|
429 |
|
'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š',
|
430 |
|
'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û',
|
431 |
|
'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý',
|
432 |
|
'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®',
|
433 |
|
'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð',
|
434 |
|
'ø'=>'ø','þ'=>'þ');
|
435 |
|
$string = strtr($string, $named_to_numbered_entities);
|
436 |
|
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
|
|
389 |
if ($charset_out=='ISO-8859-11' || $charset_out=='GB2312') {
|
|
390 |
$string=mb_convert_encoding($string, 'UTF-8', $charset_in);
|
|
391 |
if (function_exists('iconv')) {
|
|
392 |
$string = iconv('UTF-8', $charset_out, $string);
|
437 |
393 |
}
|
438 |
|
elseif ($mysqlcharset_to == 'html') {
|
439 |
|
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
|
440 |
|
$char = "";
|
441 |
|
while (strlen($string) > 0) {
|
442 |
|
preg_match("/^(.)(.*)$/su", $string, $match);
|
443 |
|
if (strlen($match[1]) > 1) {
|
444 |
|
$char .= "&#".uniord($match[1]).";";
|
445 |
|
} else $char .= $match[1];
|
446 |
|
$string = $match[2];
|
|
394 |
else {
|
|
395 |
if ($charset_out == 'GB2312') {
|
|
396 |
$string=my_mysql_iconv($string, 'utf8', 'gb2312');
|
|
397 |
} else {
|
|
398 |
$string=my_mysql_iconv($string, 'utf8', 'tis620');
|
447 |
399 |
}
|
448 |
|
$string = $char;
|
449 |
|
$string_htmlspecialchars_decode=array("<"=>"<", ">"=>">", "&"=>"&", """=>"\"", "''"=>"\'");
|
450 |
|
$string = strtr($string, $string_htmlspecialchars_decode);
|
451 |
|
$numbered_to_named_entities=array('Á'=>'Á','á'=>'á','Â'=>'Â',
|
452 |
|
'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à',
|
453 |
|
'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä',
|
454 |
|
'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é',
|
455 |
|
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë',
|
456 |
|
'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î',
|
457 |
|
'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ',
|
458 |
|
'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô',
|
459 |
|
'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ',
|
460 |
|
'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š',
|
461 |
|
'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û',
|
462 |
|
'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý',
|
463 |
|
'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®',
|
464 |
|
'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð',
|
465 |
|
'ø'=>'ø','þ'=>'þ');
|
466 |
|
$string = strtr($string, $numbered_to_named_entities);
|
467 |
400 |
}
|
|
401 |
} else {
|
|
402 |
$string=mb_convert_encoding($string, $charset_out, $charset_in);
|
468 |
403 |
}
|
|
404 |
return $string;
|
469 |
405 |
}
|
470 |
|
return($string);
|
|
406 |
|
|
407 |
// try iconv(). This can't handle to or from HTML-ENTITIES.
|
|
408 |
if (function_exists('iconv') && $charset_out!='HTML-ENTITIES' && $charset_in!='HTML-ENTITIES' ) {
|
|
409 |
$string = iconv($charset_in, $charset_out, $string);
|
|
410 |
return $string;
|
|
411 |
}
|
|
412 |
|
|
413 |
// do the UTF-8->HTML-ENTITIES or HTML-ENTITIES->UTF-8 translation
|
|
414 |
if (($charset_in=='HTML-ENTITIES' && $charset_out=='UTF-8') || ($charset_in=='UTF-8' && $charset_out=='HTML-ENTITIES')) {
|
|
415 |
$named_to_numbered_entities=array(
|
|
416 |
' '=>' ','¡'=>'¡','¢'=>'¢','£'=>'£','¤'=>'¤',
|
|
417 |
'¥'=>'¥','¦'=>'¦','§'=>'§','¨'=>'¨','ª'=>'ª',
|
|
418 |
'«'=>'«','¬'=>'¬','­'=>'­','®'=>'®','¯'=>'¯',
|
|
419 |
'°'=>'°','±'=>'±','²'=>'²','³'=>'³','´'=>'´',
|
|
420 |
'µ'=>'µ','¶'=>'¶','·'=>'·','¸'=>'¸','¹'=>'¹',
|
|
421 |
'º'=>'º','»'=>'»','¼'=>'¼','½'=>'½','¾'=>'¾',
|
|
422 |
'¿'=>'¿','÷'=>'÷','∅'=>'∅','€'=>'€',
|
|
423 |
'Á'=>'Á','á'=>'á','Â'=>'Â',
|
|
424 |
'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à',
|
|
425 |
'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä',
|
|
426 |
'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é',
|
|
427 |
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë',
|
|
428 |
'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î',
|
|
429 |
'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ',
|
|
430 |
'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô',
|
|
431 |
'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ',
|
|
432 |
'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š',
|
|
433 |
'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û',
|
|
434 |
'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý',
|
|
435 |
'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®',
|
|
436 |
'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð',
|
|
437 |
'ø'=>'ø','þ'=>'þ');
|
|
438 |
$numbered_to_named_entities=array('Á'=>'Á','á'=>'á','Â'=>'Â',
|
|
439 |
' '=>' ','¡'=>'¡','¢'=>'¢','£'=>'£','¤'=>'¤',
|
|
440 |
'¥'=>'¥','¦'=>'¦','§'=>'§','¨'=>'¨','ª'=>'ª',
|
|
441 |
'«'=>'«','¬'=>'¬','­'=>'­','®'=>'®','¯'=>'¯',
|
|
442 |
'°'=>'°','±'=>'±','²'=>'²','³'=>'³','´'=>'´',
|
|
443 |
'µ'=>'µ','¶'=>'¶','·'=>'·','¸'=>'¸','¹'=>'¹',
|
|
444 |
'º'=>'º','»'=>'»','¼'=>'¼','½'=>'½','¾'=>'¾',
|
|
445 |
'¿'=>'¿','÷'=>'÷','∅'=>'∅','€'=>'€',
|
|
446 |
'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à',
|
|
447 |
'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä',
|
|
448 |
'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é',
|
|
449 |
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë',
|
|
450 |
'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î',
|
|
451 |
'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ',
|
|
452 |
'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô',
|
|
453 |
'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ',
|
|
454 |
'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š',
|
|
455 |
'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û',
|
|
456 |
'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý',
|
|
457 |
'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®',
|
|
458 |
'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð',
|
|
459 |
'ø'=>'ø','þ'=>'þ');
|
|
460 |
if ($charset_in == 'HTML-ENTITIES') {
|
|
461 |
$string = strtr($string, $named_to_numbered_entities);
|
|
462 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
|
|
463 |
}
|
|
464 |
elseif ($charset_out == 'HTML-ENTITIES') {
|
|
465 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
|
|
466 |
$char = "";
|
|
467 |
while (strlen($string) > 0) {
|
|
468 |
preg_match("/^(.)(.*)$/su", $string, $match);
|
|
469 |
if (strlen($match[1]) > 1) {
|
|
470 |
$char .= "&#".uniord($match[1]).";";
|
|
471 |
} else $char .= $match[1];
|
|
472 |
$string = $match[2];
|
|
473 |
}
|
|
474 |
$string = $char;
|
|
475 |
$string_htmlspecialchars_decode=array("<"=>"<", ">"=>">", "&"=>"&", """=>"\"", "'"=>"\'");
|
|
476 |
$string = strtr($string, $string_htmlspecialchars_decode);
|
|
477 |
$string = strtr($string, $numbered_to_named_entities);
|
|
478 |
}
|
|
479 |
return $string;
|
|
480 |
}
|
|
481 |
|
|
482 |
// mb_convert_encoding() and iconv() aren't available, so use my_mysql_iconv()
|
|
483 |
if ($charset_in == 'ISO-8859-1') { $mysqlcharset_from = 'latin1'; }
|
|
484 |
elseif ($charset_in == 'ISO-8859-2') { $mysqlcharset_from = 'latin2'; }
|
|
485 |
elseif ($charset_in == 'ISO-8859-3') { $mysqlcharset_from = 'latin1'; }
|
|
486 |
elseif ($charset_in == 'ISO-8859-4') { $mysqlcharset_from = 'latin7'; }
|
|
487 |
elseif ($charset_in == 'ISO-8859-5') { $string = convert_cyr_string ($string, "iso8859-5", "windows-1251" ); $mysqlcharset_from = 'cp1251'; }
|
|
488 |
elseif ($charset_in == 'ISO-8859-6') { $mysqlcharset_from = ''; } //?
|
|
489 |
elseif ($charset_in == 'ISO-8859-7') { $mysqlcharset_from = 'greek'; }
|
|
490 |
elseif ($charset_in == 'ISO-8859-8') { $mysqlcharset_from = 'hebrew'; }
|
|
491 |
elseif ($charset_in == 'ISO-8859-9') { $mysqlcharset_from = 'latin5'; }
|
|
492 |
elseif ($charset_in == 'ISO-8859-10') { $mysqlcharset_from = 'latin1'; }
|
|
493 |
elseif ($charset_in == 'BIG5') { $mysqlcharset_from = 'big5'; }
|
|
494 |
elseif ($charset_in == 'ISO-2022-JP') { $mysqlcharset_from = ''; } //?
|
|
495 |
elseif ($charset_in == 'ISO-2022-KR') { $mysqlcharset_from = ''; } //?
|
|
496 |
elseif ($charset_in == 'GB2312') { $mysqlcharset_from = 'gb2312'; }
|
|
497 |
elseif ($charset_in == 'ISO-8859-11') { $mysqlcharset_from = 'tis620'; }
|
|
498 |
elseif ($charset_in == 'UTF-8') { $mysqlcharset_from = 'utf8'; }
|
|
499 |
else { $mysqlcharset_from = 'latin1'; }
|
|
500 |
|
|
501 |
if ($charset_out == 'ISO-8859-1') { $mysqlcharset_to = 'latin1'; }
|
|
502 |
elseif ($charset_out == 'ISO-8859-2') { $mysqlcharset_to = 'latin2'; }
|
|
503 |
elseif ($charset_out == 'ISO-8859-3') { $mysqlcharset_to = 'latin1'; }
|
|
504 |
elseif ($charset_out == 'ISO-8859-4') { $mysqlcharset_to = 'latin7'; }
|
|
505 |
elseif ($charset_out == 'ISO-8859-5') { $mysqlcharset_to = 'cp1251'; } // use convert_cyr_string afterwards
|
|
506 |
elseif ($charset_out == 'ISO-8859-6') { $mysqlcharset_to = ''; } //?
|
|
507 |
elseif ($charset_out == 'ISO-8859-7') { $mysqlcharset_to = 'greek'; }
|
|
508 |
elseif ($charset_out == 'ISO-8859-8') { $mysqlcharset_to = 'hebrew'; }
|
|
509 |
elseif ($charset_out == 'ISO-8859-9') { $mysqlcharset_to = 'latin5'; }
|
|
510 |
elseif ($charset_out == 'ISO-8859-10') { $mysqlcharset_to = 'latin1'; }
|
|
511 |
elseif ($charset_out == 'BIG5') { $mysqlcharset_to = 'big5'; }
|
|
512 |
elseif ($charset_out == 'ISO-2022-JP') { $mysqlcharset_to = ''; } //?
|
|
513 |
elseif ($charset_out == 'ISO-2022-KR') { $mysqlcharset_to = ''; } //?
|
|
514 |
elseif ($charset_out == 'GB2312') { $mysqlcharset_to = 'gb2312'; }
|
|
515 |
elseif ($charset_out == 'ISO-8859-11') { $mysqlcharset_to = 'tis620'; }
|
|
516 |
elseif ($charset_out == 'UTF-8') { $mysqlcharset_to = 'utf8'; }
|
|
517 |
else { $mysqlcharset_to = 'latin1'; }
|
|
518 |
|
|
519 |
if ($mysqlcharset_from!="" && $mysqlcharset_to!="" && $mysqlcharset_from!=$mysqlcharset_to) {
|
|
520 |
$string=my_mysql_iconv($string, $mysqlcharset_from, $mysqlcharset_to);
|
|
521 |
if ($mysqlcharset_to == 'cp1251') {
|
|
522 |
$string = convert_cyr_string ($string, "windows-1251", "iso-8859-5" );
|
|
523 |
}
|
|
524 |
return($string);
|
|
525 |
}
|
|
526 |
|
|
527 |
// $string is unchanged. This will happen if we have to deal with ISO-8859-6 or ISO-2022-JP or -KR
|
|
528 |
// and mbstring _and_ iconv aren't available.
|
|
529 |
return $string;
|
471 |
530 |
}
|
472 |
531 |
// support-function for mb_convert_encoding_wrapper()
|
473 |
532 |
function uniord($c) {
|
... | ... | |
500 |
559 |
$charset = strtoupper($charset);
|
501 |
560 |
if ($charset == '') { $charset = 'ISO-8859-1'; }
|
502 |
561 |
|
503 |
|
// there's no GB2312 or ISO-8859-11 encoding in php's mb_* functions
|
504 |
|
if ($charset == "GB2312") {
|
505 |
|
$string=my_mysql_iconv($string, 'gb2312', 'utf8');
|
506 |
|
} elseif ($charset == "ISO-8859-11") {
|
507 |
|
$string=my_mysql_iconv($string, 'tis620', 'utf8');
|
508 |
|
} elseif ($charset != "UTF-8") {
|
|
562 |
if (!is_UTF8($string)) {
|
509 |
563 |
$string=mb_convert_encoding_wrapper($string, 'UTF-8', $charset);
|
|
564 |
} else {
|
510 |
565 |
}
|
511 |
|
$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8');
|
512 |
|
$string=mb_convert_encoding_wrapper($string, 'UTF-8', 'HTML-ENTITIES');
|
|
566 |
|
|
567 |
// check if we really get UTF-8. We don't get UTF-8 if charset is ISO-8859-11 or GB2312 and mb_string AND iconv aren't available.
|
|
568 |
if (is_UTF8($string)) {
|
|
569 |
$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8');
|
|
570 |
$string=mb_convert_encoding_wrapper($string, 'UTF-8', 'HTML-ENTITIES');
|
|
571 |
} else {
|
|
572 |
}
|
513 |
573 |
return($string);
|
514 |
574 |
}
|
515 |
575 |
|
|
576 |
// function to check if a string is UTF-8
|
|
577 |
function is_UTF8 ($string) {
|
|
578 |
return preg_match('%^(?:[\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$%xs', $string);
|
|
579 |
}
|
|
580 |
|
516 |
581 |
// Function to convert a string from mixed html-entities/umlauts to pure $charset_out-umlauts
|
517 |
582 |
function entities_to_umlauts($string, $charset_out=DEFAULT_CHARSET, $convert_htmlspecialchars=0) {
|
518 |
583 |
$charset_out = strtoupper($charset_out);
|
519 |
|
if ($charset_out == '') {
|
520 |
|
$charset_out = 'ISO-8859-1';
|
521 |
|
}
|
|
584 |
if ($charset_out == '') { $charset_out = 'ISO-8859-1'; }
|
522 |
585 |
$string = string_to_utf8($string);
|
523 |
|
if($charset_out != 'UTF-8') {
|
524 |
|
if ($charset_out == "GB2312") {
|
525 |
|
$string=my_mysql_iconv($string, 'utf8', 'gb2312');
|
526 |
|
} elseif ($charset_out == "ISO-8859-11") {
|
527 |
|
$string=my_mysql_iconv($string, 'utf8', 'tis620');
|
528 |
|
} else {
|
529 |
|
$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8');
|
530 |
|
}
|
531 |
|
}
|
532 |
586 |
if($convert_htmlspecialchars == 1) {
|
533 |
587 |
$string=htmlspecialchars($string);
|
534 |
588 |
}
|
|
589 |
if($charset_out!='UTF-8' && is_UTF8($string)) {
|
|
590 |
$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8');
|
|
591 |
}
|
535 |
592 |
return($string);
|
536 |
593 |
}
|
537 |
594 |
|
538 |
595 |
// Function to convert a string from mixed html-entitites/$charset_in-umlauts to pure html-entities
|
539 |
596 |
function umlauts_to_entities($string, $charset_in=DEFAULT_CHARSET, $convert_htmlspecialchars=1) {
|
540 |
597 |
$charset_in = strtoupper($charset_in);
|
541 |
|
if ($charset_in == "") {
|
542 |
|
$charset_in = 'ISO-8859-1';
|
543 |
|
}
|
|
598 |
if ($charset_in == "") { $charset_in = 'ISO-8859-1'; }
|
544 |
599 |
$string = string_to_utf8($string, $charset_in);
|
545 |
600 |
if($convert_htmlspecialchars == 1) {
|
546 |
601 |
$string=htmlspecialchars($string,ENT_QUOTES);
|
547 |
602 |
}
|
548 |
|
$string=mb_convert_encoding_wrapper($string,'HTML-ENTITIES','UTF-8');
|
|
603 |
if (is_UTF8($string)) {
|
|
604 |
$string=mb_convert_encoding_wrapper($string,'HTML-ENTITIES','UTF-8');
|
|
605 |
}
|
549 |
606 |
return($string);
|
550 |
607 |
}
|
551 |
608 |
|
552 |
609 |
// translate any latin/greek/cyrillic html-entities to their plain 7bit equivalents
|
|
610 |
// and numbered-entities into hex
|
553 |
611 |
function entities_to_7bit($string) {
|
554 |
612 |
require(WB_PATH.'/framework/convert.php');
|
555 |
613 |
$string = strtr($string, $conversion_array);
|
|
614 |
$string = preg_replace('/&#([0-9]+);/e', "dechex('$1')", $string);
|
556 |
615 |
return($string);
|
557 |
616 |
}
|
558 |
617 |
|
Added support for ISO-8859-6. WB now fully supports UTF8 and all ISO charsets except ISO-2022-JP and ISO-2022-KR. This languages only can be used with UTF8.