340 |
340 |
|
341 |
341 |
// Function as replecement for php's htmlspecialchars()
|
342 |
342 |
function my_htmlspecialchars($string) {
|
343 |
|
$string = strtr($string, array("<"=>"<", ">"=>">", "\""=>""", "\'"=>"'"));
|
|
343 |
$string = preg_replace("/&(?=[#a-z0-9]+;)/i", "_x_", $string);
|
|
344 |
$string = strtr($string, array("<"=>"<", ">"=>">", "&"=>"&", "\""=>""", "\'"=>"'"));
|
|
345 |
$string = preg_replace("/_x_(?=[#a-z0-9]+;)/i", "&", $string);
|
344 |
346 |
return($string);
|
345 |
347 |
}
|
346 |
348 |
|
... | ... | |
369 |
371 |
if ($charset_out == $charset_in) {
|
370 |
372 |
return $string;
|
371 |
373 |
}
|
|
374 |
$use_iconv = true;
|
|
375 |
$use_mbstring = true;
|
|
376 |
if(version_compare(PHP_VERSION, "5.1.0", "<")) {
|
|
377 |
$use_mbstring = false; // don't rely on mb_convert_encoding if php<5.1.0
|
|
378 |
$use_iconv = false; // don't rely on iconv neither
|
|
379 |
}
|
|
380 |
|
372 |
381 |
// try mb_convert_encoding(). This can handle to or from HTML-ENTITIES, too
|
373 |
|
if (function_exists('mb_convert_encoding')) {
|
|
382 |
if ($use_mbstring && function_exists('mb_convert_encoding')) {
|
374 |
383 |
// there's no GB2312 or ISO-8859-11 encoding in php's mb_* functions
|
375 |
384 |
if ($charset_in=='ISO-8859-11' || $charset_in=='GB2312') {
|
376 |
|
if (function_exists('iconv')) {
|
|
385 |
if ($use_iconv && function_exists('iconv')) {
|
377 |
386 |
$string = iconv($charset_in, 'UTF-8', $string);
|
378 |
387 |
}
|
379 |
388 |
else {
|
... | ... | |
390 |
399 |
}
|
391 |
400 |
if ($charset_out=='ISO-8859-11' || $charset_out=='GB2312') {
|
392 |
401 |
$string=mb_convert_encoding($string, 'UTF-8', $charset_in);
|
393 |
|
if (function_exists('iconv')) {
|
|
402 |
if ($use_iconv && function_exists('iconv')) {
|
394 |
403 |
$string = iconv('UTF-8', $charset_out, $string);
|
395 |
404 |
}
|
396 |
405 |
else {
|
... | ... | |
409 |
418 |
}
|
410 |
419 |
|
411 |
420 |
// try iconv(). This can't handle to or from HTML-ENTITIES.
|
412 |
|
if (function_exists('iconv') && $charset_out!='HTML-ENTITIES' && $charset_in!='HTML-ENTITIES' ) {
|
|
421 |
if ($use_iconv && function_exists('iconv') && $charset_out!='HTML-ENTITIES' && $charset_in!='HTML-ENTITIES' ) {
|
413 |
422 |
$string = iconv($charset_in, $charset_out, $string);
|
414 |
423 |
return $string;
|
415 |
424 |
}
|
416 |
425 |
|
417 |
|
// do the UTF-8->HTML-ENTITIES or HTML-ENTITIES->UTF-8 translation
|
|
426 |
// do the UTF-8->HTML-ENTITIES or HTML-ENTITIES->UTF-8 translation if mb_convert_encoding isn't available
|
418 |
427 |
if (($charset_in=='HTML-ENTITIES' && $charset_out=='UTF-8') || ($charset_in=='UTF-8' && $charset_out=='HTML-ENTITIES')) {
|
419 |
|
$named_to_numbered_entities=array(
|
420 |
|
' '=>' ','¡'=>'¡','¢'=>'¢','£'=>'£','¤'=>'¤',
|
421 |
|
'¥'=>'¥','¦'=>'¦','§'=>'§','¨'=>'¨','ª'=>'ª',
|
422 |
|
'«'=>'«','¬'=>'¬','­'=>'­','®'=>'®','¯'=>'¯',
|
423 |
|
'°'=>'°','±'=>'±','²'=>'²','³'=>'³','´'=>'´',
|
424 |
|
'µ'=>'µ','¶'=>'¶','·'=>'·','¸'=>'¸','¹'=>'¹',
|
425 |
|
'º'=>'º','»'=>'»','¼'=>'¼','½'=>'½','¾'=>'¾',
|
426 |
|
'¿'=>'¿','÷'=>'÷','∅'=>'∅','€'=>'€',
|
427 |
|
'Á'=>'Á','á'=>'á','Â'=>'Â',
|
428 |
|
'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à',
|
429 |
|
'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä',
|
430 |
|
'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é',
|
431 |
|
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë',
|
432 |
|
'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î',
|
433 |
|
'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ',
|
434 |
|
'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô',
|
435 |
|
'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ',
|
436 |
|
'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š',
|
437 |
|
'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û',
|
438 |
|
'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý',
|
439 |
|
'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®',
|
440 |
|
'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð',
|
441 |
|
'ø'=>'ø','þ'=>'þ');
|
442 |
|
$numbered_to_named_entities=array('Á'=>'Á','á'=>'á','Â'=>'Â',
|
443 |
|
' '=>' ','¡'=>'¡','¢'=>'¢','£'=>'£','¤'=>'¤',
|
444 |
|
'¥'=>'¥','¦'=>'¦','§'=>'§','¨'=>'¨','ª'=>'ª',
|
445 |
|
'«'=>'«','¬'=>'¬','­'=>'­','®'=>'®','¯'=>'¯',
|
446 |
|
'°'=>'°','±'=>'±','²'=>'²','³'=>'³','´'=>'´',
|
447 |
|
'µ'=>'µ','¶'=>'¶','·'=>'·','¸'=>'¸','¹'=>'¹',
|
448 |
|
'º'=>'º','»'=>'»','¼'=>'¼','½'=>'½','¾'=>'¾',
|
449 |
|
'¿'=>'¿','÷'=>'÷','∅'=>'∅','€'=>'€',
|
450 |
|
'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à',
|
451 |
|
'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä',
|
452 |
|
'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é',
|
453 |
|
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë',
|
454 |
|
'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î',
|
455 |
|
'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ',
|
456 |
|
'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô',
|
457 |
|
'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ',
|
458 |
|
'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š',
|
459 |
|
'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û',
|
460 |
|
'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý',
|
461 |
|
'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®',
|
462 |
|
'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð',
|
463 |
|
'ø'=>'ø','þ'=>'þ');
|
464 |
|
if ($charset_in == 'HTML-ENTITIES') {
|
465 |
|
$string = strtr($string, $named_to_numbered_entities);
|
466 |
|
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
|
467 |
|
}
|
468 |
|
elseif ($charset_out == 'HTML-ENTITIES') {
|
469 |
|
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
|
470 |
|
$char = "";
|
471 |
|
while (strlen($string) > 0) {
|
472 |
|
preg_match("/^(.)(.*)$/su", $string, $match);
|
473 |
|
if (strlen($match[1]) > 1) {
|
474 |
|
$char .= "&#".uniord($match[1]).";";
|
475 |
|
} else $char .= $match[1];
|
476 |
|
$string = $match[2];
|
477 |
|
}
|
478 |
|
$string = $char;
|
479 |
|
$string = strtr($string, $numbered_to_named_entities);
|
480 |
|
}
|
|
428 |
$string = string_decode_encode_entities($string, $charset_out, $charset_in);
|
481 |
429 |
return $string;
|
482 |
430 |
}
|
483 |
431 |
|
... | ... | |
530 |
478 |
// and mbstring _and_ iconv aren't available.
|
531 |
479 |
return $string;
|
532 |
480 |
}
|
533 |
|
// support-function for mb_convert_encoding_wrapper()
|
|
481 |
|
|
482 |
// Decodes or encodes html-entities. Works for utf-8 only!
|
|
483 |
function string_decode_encode_entities($string, $out='HTML-ENTITIES', $in='UTF-8') {
|
|
484 |
if(!(($in=='UTF-8' || $in=='HTML-ENTITIES') && ($out=='UTF-8' || $out=='HTML-ENTITIES'))) {
|
|
485 |
return $string;
|
|
486 |
}
|
|
487 |
$named_to_numbered_entities=array(
|
|
488 |
'Á'=>'Á','á'=>'á',
|
|
489 |
'Â'=>'Â','â'=>'â','´'=>'´','Æ'=>'Æ','æ'=>'æ',
|
|
490 |
'À'=>'À','à'=>'à','ℵ'=>'ℵ','Α'=>'Α','α'=>'α',
|
|
491 |
'&'=>'&','∧'=>'∧','∠'=>'∠','''=>''','Å'=>'Å','å'=>'å',
|
|
492 |
'≈'=>'≈','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä','ä'=>'ä',
|
|
493 |
'„'=>'„','Β'=>'Β','β'=>'β','¦'=>'¦','•'=>'•',
|
|
494 |
'∩'=>'∩','Ç'=>'Ç','ç'=>'ç','¸'=>'¸','¢'=>'¢',
|
|
495 |
'Χ'=>'Χ','χ'=>'χ','ˆ'=>'ˆ','♣'=>'♣','≅'=>'≅',
|
|
496 |
'©'=>'©','↵'=>'↵','∪'=>'∪','¤'=>'¤','‡'=>'‡',
|
|
497 |
'†'=>'†','⇓'=>'⇓','↓'=>'↓','°'=>'°','Δ'=>'Δ',
|
|
498 |
'δ'=>'δ','♦'=>'&v#9830;','÷'=>'÷','É'=>'É','é'=>'é',
|
|
499 |
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','∅'=>'∅',
|
|
500 |
' '=>' ',' '=>' ','Ε'=>'Ε','ε'=>'ε','≡'=>'≡',
|
|
501 |
'Η'=>'Η','η'=>'η','Ð'=>'Ð','ð'=>'ð','Ë'=>'Ë','ë'=>'ë',
|
|
502 |
'€'=>'€','∃'=>'∃','ƒ'=>'ƒ','∀'=>'∀','½'=>'½',
|
|
503 |
'¼'=>'¼','¾'=>'¾','⁄'=>'⁄','Γ'=>'Γ','γ'=>'γ',
|
|
504 |
'≥'=>'≥','>'=>'>','⇔'=>'⇔','↔'=>'↔','♥'=>'♥',
|
|
505 |
'…'=>'…','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î',
|
|
506 |
'¡'=>'¡','Ì'=>'Ì','ì'=>'ì','ℑ'=>'ℑ','∞'=>'∞',
|
|
507 |
'∫'=>'∫','Ι'=>'Ι','ι'=>'ι','¿'=>'¿','∈'=>'∈',
|
|
508 |
'Ï'=>'Ï','ï'=>'ï','Κ'=>'Κ','κ'=>'κ','Λ'=>'Λ',
|
|
509 |
'λ'=>'λ','⟨'=>'〈','«'=>'«','⇐'=>'⇐','←'=>'←',
|
|
510 |
'⌈'=>'⌈','“'=>'“','≤'=>'≤','⌊'=>'⌊','∗'=>'∗',
|
|
511 |
'◊'=>'◊','‎'=>'‎','‹'=>'‹','‘'=>'‘','<'=>'<',
|
|
512 |
'¯'=>'¯','—'=>'—','µ'=>'µ','·'=>'·','−'=>'−',
|
|
513 |
'Μ'=>'Μ','μ'=>'μ','∇'=>'∇',' '=>' ','–'=>'–',
|
|
514 |
'≠'=>'≠','∋'=>'∋','¬'=>'¬','∉'=>'∉','⊄'=>'⊄',
|
|
515 |
'Ñ'=>'Ñ','ñ'=>'ñ','Ν'=>'Ν','ν'=>'ν','Ó'=>'Ó',
|
|
516 |
'ó'=>'ó','Ô'=>'Ô','ô'=>'ô','Œ'=>'Œ','œ'=>'œ',
|
|
517 |
'Ò'=>'Ò','ò'=>'ò','‾'=>'‾','Ω'=>'Ω','ω'=>'ω',
|
|
518 |
'Ο'=>'Ο','ο'=>'ο','⊕'=>'⊕','∨'=>'∨','ª'=>'ª',
|
|
519 |
'º'=>'º','Ø'=>'Ø','ø'=>'ø','Õ'=>'Õ','õ'=>'õ',
|
|
520 |
'⊗'=>'⊗','Ö'=>'Ö','ö'=>'ö','¶'=>'¶','∂'=>'∂',
|
|
521 |
'‰'=>'‰','⊥'=>'⊥','Φ'=>'Φ','φ'=>'φ','Π'=>'Π',
|
|
522 |
'π'=>'π','ϖ'=>'ϖ','±'=>'±','£'=>'£','″'=>'″',
|
|
523 |
'′'=>'′','∏'=>'∏','∝'=>'∝','Ψ'=>'Ψ','ψ'=>'ψ',
|
|
524 |
'"'=>'"','√'=>'√','⟩'=>'〉','»'=>'»','⇒'=>'⇒',
|
|
525 |
'→'=>'→','⌉'=>'⌉','”'=>'”','ℜ'=>'ℜ','®'=>'®',
|
|
526 |
'⌋'=>'⌋','Ρ'=>'Ρ','ρ'=>'ρ','‏'=>'‏','›'=>'›',
|
|
527 |
'’'=>'’','‚'=>'‚','Š'=>'Š','š'=>'š','⋅'=>'⋅',
|
|
528 |
'§'=>'§','­'=>'­','Σ'=>'Σ','σ'=>'σ','ς'=>'ς',
|
|
529 |
'∼'=>'∼','♠'=>'♠','⊂'=>'⊂','⊆'=>'⊆','∑'=>'∑',
|
|
530 |
'⊃'=>'⊃','¹'=>'¹','²'=>'²','³'=>'³','⊇'=>'⊇',
|
|
531 |
'ß'=>'ß','Τ'=>'Τ','τ'=>'τ','∴'=>'∴','Θ'=>'Θ',
|
|
532 |
'θ'=>'θ','ϑ'=>'ϑ',' '=>' ','Þ'=>'Þ','þ'=>'þ',
|
|
533 |
'˜'=>'˜','×'=>'×','™'=>'™','Ú'=>'Ú','ú'=>'ú',
|
|
534 |
'⇑'=>'⇑','↑'=>'↑','Û'=>'Û','û'=>'û','Ù'=>'Ù',
|
|
535 |
'ù'=>'ù','¨'=>'¨','ϒ'=>'ϒ','Υ'=>'Υ','υ'=>'υ',
|
|
536 |
'Ü'=>'Ü','ü'=>'ü','℘'=>'℘','Ξ'=>'Ξ','ξ'=>'ξ',
|
|
537 |
'Ý'=>'Ý','ý'=>'ý','¥'=>'¥','Ÿ'=>'Ÿ','ÿ'=>'ÿ',
|
|
538 |
'Ζ'=>'Ζ','ζ'=>'ζ','‍'=>'‍','‌'=>'‌'
|
|
539 |
);
|
|
540 |
$numbered_to_named_entities=array(
|
|
541 |
'Á'=>'Á','á'=>'á','Â'=>'Â','â'=>'â','´'=>'´',
|
|
542 |
'Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à','ℵ'=>'ℵ',
|
|
543 |
'Α'=>'Α','α'=>'α','&'=>'&','∧'=>'∧','∠'=>'∠',
|
|
544 |
'''=>''','Å'=>'Å','å'=>'å','≈'=>'≈','Ã'=>'Ã',
|
|
545 |
'ã'=>'ã','Ä'=>'Ä','ä'=>'ä','„'=>'„','Β'=>'Β',
|
|
546 |
'β'=>'β','¦'=>'¦','•'=>'•','∩'=>'∩','Ç'=>'Ç',
|
|
547 |
'ç'=>'ç','¸'=>'¸','¢'=>'¢','Χ'=>'Χ','χ'=>'χ',
|
|
548 |
'ˆ'=>'ˆ','♣'=>'♣','≅'=>'≅','©'=>'©','↵'=>'↵',
|
|
549 |
'∪'=>'∪','¤'=>'¤','‡'=>'‡','†'=>'†','⇓'=>'⇓',
|
|
550 |
'↓'=>'↓','°'=>'°','Δ'=>'Δ','δ'=>'δ','&v#9830;'=>'♦',
|
|
551 |
'÷'=>'÷','É'=>'É','é'=>'é','Ê'=>'Ê','ê'=>'ê',
|
|
552 |
'È'=>'È','è'=>'è','∅'=>'∅',' '=>' ',' '=>' ',
|
|
553 |
'Ε'=>'Ε','ε'=>'ε','≡'=>'≡','Η'=>'Η','η'=>'η',
|
|
554 |
'Ð'=>'Ð','ð'=>'ð','Ë'=>'Ë','ë'=>'ë','€'=>'€',
|
|
555 |
'∃'=>'∃','ƒ'=>'ƒ','∀'=>'∀','½'=>'½','¼'=>'¼',
|
|
556 |
'¾'=>'¾','⁄'=>'⁄','Γ'=>'Γ','γ'=>'γ','≥'=>'≥',
|
|
557 |
'>'=>'>','⇔'=>'⇔','↔'=>'↔','♥'=>'♥','…'=>'…',
|
|
558 |
'Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î','¡'=>'¡',
|
|
559 |
'Ì'=>'Ì','ì'=>'ì','ℑ'=>'ℑ','∞'=>'∞','∫'=>'∫',
|
|
560 |
'Ι'=>'Ι','ι'=>'ι','¿'=>'¿','∈'=>'∈','Ï'=>'Ï',
|
|
561 |
'ï'=>'ï','Κ'=>'Κ','κ'=>'κ','Λ'=>'Λ','λ'=>'λ',
|
|
562 |
'〈'=>'⟨','«'=>'«','⇐'=>'⇐','←'=>'←','⌈'=>'⌈',
|
|
563 |
'“'=>'“','≤'=>'≤','⌊'=>'⌊','∗'=>'∗','◊'=>'◊',
|
|
564 |
'‎'=>'‎','‹'=>'‹','‘'=>'‘','<'=>'<','¯'=>'¯',
|
|
565 |
'—'=>'—','µ'=>'µ','·'=>'·','−'=>'−','Μ'=>'Μ',
|
|
566 |
'μ'=>'μ','∇'=>'∇',' '=>' ','–'=>'–','≠'=>'≠',
|
|
567 |
'∋'=>'∋','¬'=>'¬','∉'=>'∉','⊄'=>'⊄','Ñ'=>'Ñ',
|
|
568 |
'ñ'=>'ñ','Ν'=>'Ν','ν'=>'ν','Ó'=>'Ó','ó'=>'ó',
|
|
569 |
'Ô'=>'Ô','ô'=>'ô','Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò',
|
|
570 |
'ò'=>'ò','‾'=>'‾','Ω'=>'Ω','ω'=>'ω','Ο'=>'Ο',
|
|
571 |
'ο'=>'ο','⊕'=>'⊕','∨'=>'∨','ª'=>'ª','º'=>'º',
|
|
572 |
'Ø'=>'Ø','ø'=>'ø','Õ'=>'Õ','õ'=>'õ','⊗'=>'⊗',
|
|
573 |
'Ö'=>'Ö','ö'=>'ö','¶'=>'¶','∂'=>'∂','‰'=>'‰',
|
|
574 |
'⊥'=>'⊥','Φ'=>'Φ','φ'=>'φ','Π'=>'Π','π'=>'π','ϖ'=>'ϖ',
|
|
575 |
'±'=>'±','£'=>'£','″'=>'″','′'=>'′','∏'=>'∏',
|
|
576 |
'∝'=>'∝','Ψ'=>'Ψ','ψ'=>'ψ','"'=>'"','√'=>'√',
|
|
577 |
'〉'=>'⟩','»'=>'»','⇒'=>'⇒','→'=>'→','⌉'=>'⌉',
|
|
578 |
'”'=>'”','ℜ'=>'ℜ','®'=>'®','⌋'=>'⌋','Ρ'=>'Ρ',
|
|
579 |
'ρ'=>'ρ','‏'=>'‏','›'=>'›','’'=>'’','‚'=>'‚',
|
|
580 |
'Š'=>'Š','š'=>'š','⋅'=>'⋅','§'=>'§','­'=>'­',
|
|
581 |
'Σ'=>'Σ','σ'=>'σ','ς'=>'ς','∼'=>'∼','♠'=>'♠',
|
|
582 |
'⊂'=>'⊂','⊆'=>'⊆','∑'=>'∑','⊃'=>'⊃','¹'=>'¹',
|
|
583 |
'²'=>'²','³'=>'³','⊇'=>'⊇','ß'=>'ß','Τ'=>'Τ',
|
|
584 |
'τ'=>'τ','∴'=>'∴','Θ'=>'Θ','θ'=>'θ','ϑ'=>'ϑ',
|
|
585 |
' '=>' ','Þ'=>'Þ','þ'=>'þ','˜'=>'˜','×'=>'×',
|
|
586 |
'™'=>'™','Ú'=>'Ú','ú'=>'ú','⇑'=>'⇑','↑'=>'↑',
|
|
587 |
'Û'=>'Û','û'=>'û','Ù'=>'Ù','ù'=>'ù','¨'=>'¨',
|
|
588 |
'ϒ'=>'ϒ','Υ'=>'Υ','υ'=>'υ','Ü'=>'Ü','ü'=>'ü',
|
|
589 |
'℘'=>'℘','Ξ'=>'Ξ','ξ'=>'ξ','Ý'=>'Ý','ý'=>'ý',
|
|
590 |
'¥'=>'¥','Ÿ'=>'Ÿ','ÿ'=>'ÿ','Ζ'=>'Ζ','ζ'=>'ζ','‍'=>'‍',
|
|
591 |
'‌'=>'‌'
|
|
592 |
);
|
|
593 |
|
|
594 |
if ($in == 'HTML-ENTITIES') {
|
|
595 |
$string = strtr($string, $named_to_numbered_entities);
|
|
596 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
|
|
597 |
}
|
|
598 |
elseif ($out == 'HTML-ENTITIES') {
|
|
599 |
//$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
|
|
600 |
$char = "";
|
|
601 |
while (strlen($string) > 0) {
|
|
602 |
preg_match("/^(.)(.*)$/su", $string, $match);
|
|
603 |
if (strlen($match[1]) > 1) {
|
|
604 |
$char .= "&#".uniord($match[1]).";";
|
|
605 |
} else $char .= $match[1];
|
|
606 |
$string = $match[2];
|
|
607 |
}
|
|
608 |
$string = $char;
|
|
609 |
$string = strtr($string, $numbered_to_named_entities);
|
|
610 |
}
|
|
611 |
return $string;
|
|
612 |
}
|
|
613 |
|
|
614 |
// support-function for string_decode_encode_entities()
|
534 |
615 |
function uniord($c) {
|
535 |
616 |
$ud = 0;
|
536 |
617 |
if (ord($c{0}) >= 0 && ord($c{0}) <= 127) $ud = ord($c{0});
|
... | ... | |
604 |
685 |
function entities_to_umlauts($string, $charset_out=DEFAULT_CHARSET, $convert_htmlspecialchars=0) {
|
605 |
686 |
$charset_out = strtoupper($charset_out);
|
606 |
687 |
if ($charset_out == '') { $charset_out = 'ISO-8859-1'; }
|
607 |
|
$string = string_to_utf8($string);
|
608 |
|
if($charset_out!='UTF-8' && is_UTF8($string)) {
|
|
688 |
$charset_in = strtoupper(DEFAULT_CHARSET);
|
|
689 |
|
|
690 |
// string to utf-8
|
|
691 |
if ($charset_in == 'ISO-8859-1' || $charset_in == 'UTF-8') {
|
|
692 |
if ($charset_in == 'ISO-8859-1') {
|
|
693 |
$string=utf8_encode($string);
|
|
694 |
}
|
|
695 |
// decode html-entities
|
|
696 |
if(preg_match("/&[#a-zA-Z0-9]+;/", $string)) {
|
|
697 |
$string=string_decode_encode_entities($string, 'UTF-8', 'HTML-ENTITIES');
|
|
698 |
}
|
|
699 |
}
|
|
700 |
else {
|
|
701 |
$string = string_to_utf8($string); // will decode html-entities, too.
|
|
702 |
}
|
|
703 |
// string to $charset_out
|
|
704 |
if($charset_out == 'ISO-8859-1') {
|
|
705 |
$string=utf8_decode($string);
|
|
706 |
}
|
|
707 |
elseif($charset_out != 'UTF-8' && is_UTF8($string)) {
|
609 |
708 |
$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8');
|
610 |
709 |
}
|
611 |
|
return($string);
|
612 |
|
}
|
|
710 |
return $string;
|
|
711 |
}
|
613 |
712 |
|
614 |
713 |
// Function to convert a string from mixed html-entitites/$charset_in-umlauts to pure html-entities
|
615 |
714 |
function umlauts_to_entities($string, $charset_in=DEFAULT_CHARSET, $convert_htmlspecialchars=0) {
|
616 |
715 |
$charset_in = strtoupper($charset_in);
|
617 |
716 |
if ($charset_in == "") { $charset_in = 'ISO-8859-1'; }
|
618 |
|
$string = string_to_utf8($string, $charset_in);
|
619 |
|
if (is_UTF8($string)) {
|
620 |
|
$string=mb_convert_encoding_wrapper($string,'HTML-ENTITIES','UTF-8');
|
|
717 |
|
|
718 |
// string to utf-8
|
|
719 |
if ($charset_in == 'ISO-8859-1' || $charset_in == 'UTF-8') {
|
|
720 |
if ($charset_in == 'ISO-8859-1') {
|
|
721 |
$string=utf8_encode($string);
|
|
722 |
}
|
|
723 |
// encode html-entities
|
|
724 |
$string=string_decode_encode_entities($string, 'HTML-ENTITIES', 'UTF-8');
|
|
725 |
//$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8');
|
621 |
726 |
}
|
622 |
|
return($string);
|
|
727 |
else {
|
|
728 |
$string = string_to_utf8($string, $charset_in);
|
|
729 |
// encode html-entities
|
|
730 |
if (is_UTF8($string)) {
|
|
731 |
$string=string_decode_encode_entities($string, 'HTML-ENTITIES', 'UTF-8');
|
|
732 |
//$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8');
|
|
733 |
}
|
|
734 |
}
|
|
735 |
return $string;
|
623 |
736 |
}
|
624 |
737 |
|
625 |
738 |
// translate any latin/greek/cyrillic html-entities to their plain 7bit equivalents
|
Added function wich replaces mbstring cause this seems not to work properly on some php4 versions (Thanks to thorn)