Revision 476
Added by Matthias over 17 years ago
functions.php | ||
---|---|---|
340 | 340 |
|
341 | 341 |
// Function as replecement for php's htmlspecialchars() |
342 | 342 |
function my_htmlspecialchars($string) { |
343 |
$string = strtr($string, array("<"=>"<", ">"=>">", "\""=>""", "\'"=>"'")); |
|
343 |
$string = preg_replace("/&(?=[#a-z0-9]+;)/i", "_x_", $string); |
|
344 |
$string = strtr($string, array("<"=>"<", ">"=>">", "&"=>"&", "\""=>""", "\'"=>"'")); |
|
345 |
$string = preg_replace("/_x_(?=[#a-z0-9]+;)/i", "&", $string); |
|
344 | 346 |
return($string); |
345 | 347 |
} |
346 | 348 |
|
... | ... | |
369 | 371 |
if ($charset_out == $charset_in) { |
370 | 372 |
return $string; |
371 | 373 |
} |
374 |
$use_iconv = true; |
|
375 |
$use_mbstring = true; |
|
376 |
if(version_compare(PHP_VERSION, "5.1.0", "<")) { |
|
377 |
$use_mbstring = false; // don't rely on mb_convert_encoding if php<5.1.0 |
|
378 |
$use_iconv = false; // don't rely on iconv neither |
|
379 |
} |
|
380 |
|
|
372 | 381 |
// try mb_convert_encoding(). This can handle to or from HTML-ENTITIES, too |
373 |
if (function_exists('mb_convert_encoding')) { |
|
382 |
if ($use_mbstring && function_exists('mb_convert_encoding')) {
|
|
374 | 383 |
// there's no GB2312 or ISO-8859-11 encoding in php's mb_* functions |
375 | 384 |
if ($charset_in=='ISO-8859-11' || $charset_in=='GB2312') { |
376 |
if (function_exists('iconv')) { |
|
385 |
if ($use_iconv && function_exists('iconv')) {
|
|
377 | 386 |
$string = iconv($charset_in, 'UTF-8', $string); |
378 | 387 |
} |
379 | 388 |
else { |
... | ... | |
390 | 399 |
} |
391 | 400 |
if ($charset_out=='ISO-8859-11' || $charset_out=='GB2312') { |
392 | 401 |
$string=mb_convert_encoding($string, 'UTF-8', $charset_in); |
393 |
if (function_exists('iconv')) { |
|
402 |
if ($use_iconv && function_exists('iconv')) {
|
|
394 | 403 |
$string = iconv('UTF-8', $charset_out, $string); |
395 | 404 |
} |
396 | 405 |
else { |
... | ... | |
409 | 418 |
} |
410 | 419 |
|
411 | 420 |
// try iconv(). This can't handle to or from HTML-ENTITIES. |
412 |
if (function_exists('iconv') && $charset_out!='HTML-ENTITIES' && $charset_in!='HTML-ENTITIES' ) { |
|
421 |
if ($use_iconv && function_exists('iconv') && $charset_out!='HTML-ENTITIES' && $charset_in!='HTML-ENTITIES' ) {
|
|
413 | 422 |
$string = iconv($charset_in, $charset_out, $string); |
414 | 423 |
return $string; |
415 | 424 |
} |
416 | 425 |
|
417 |
// do the UTF-8->HTML-ENTITIES or HTML-ENTITIES->UTF-8 translation |
|
426 |
// do the UTF-8->HTML-ENTITIES or HTML-ENTITIES->UTF-8 translation if mb_convert_encoding isn't available
|
|
418 | 427 |
if (($charset_in=='HTML-ENTITIES' && $charset_out=='UTF-8') || ($charset_in=='UTF-8' && $charset_out=='HTML-ENTITIES')) { |
419 |
$named_to_numbered_entities=array( |
|
420 |
' '=>' ','¡'=>'¡','¢'=>'¢','£'=>'£','¤'=>'¤', |
|
421 |
'¥'=>'¥','¦'=>'¦','§'=>'§','¨'=>'¨','ª'=>'ª', |
|
422 |
'«'=>'«','¬'=>'¬','­'=>'­','®'=>'®','¯'=>'¯', |
|
423 |
'°'=>'°','±'=>'±','²'=>'²','³'=>'³','´'=>'´', |
|
424 |
'µ'=>'µ','¶'=>'¶','·'=>'·','¸'=>'¸','¹'=>'¹', |
|
425 |
'º'=>'º','»'=>'»','¼'=>'¼','½'=>'½','¾'=>'¾', |
|
426 |
'¿'=>'¿','÷'=>'÷','∅'=>'∅','€'=>'€', |
|
427 |
'Á'=>'Á','á'=>'á','Â'=>'Â', |
|
428 |
'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à', |
|
429 |
'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä', |
|
430 |
'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é', |
|
431 |
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë', |
|
432 |
'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î', |
|
433 |
'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ', |
|
434 |
'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô', |
|
435 |
'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ', |
|
436 |
'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š', |
|
437 |
'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û', |
|
438 |
'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý', |
|
439 |
'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®', |
|
440 |
'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð', |
|
441 |
'ø'=>'ø','þ'=>'þ'); |
|
442 |
$numbered_to_named_entities=array('Á'=>'Á','á'=>'á','Â'=>'Â', |
|
443 |
' '=>' ','¡'=>'¡','¢'=>'¢','£'=>'£','¤'=>'¤', |
|
444 |
'¥'=>'¥','¦'=>'¦','§'=>'§','¨'=>'¨','ª'=>'ª', |
|
445 |
'«'=>'«','¬'=>'¬','­'=>'­','®'=>'®','¯'=>'¯', |
|
446 |
'°'=>'°','±'=>'±','²'=>'²','³'=>'³','´'=>'´', |
|
447 |
'µ'=>'µ','¶'=>'¶','·'=>'·','¸'=>'¸','¹'=>'¹', |
|
448 |
'º'=>'º','»'=>'»','¼'=>'¼','½'=>'½','¾'=>'¾', |
|
449 |
'¿'=>'¿','÷'=>'÷','∅'=>'∅','€'=>'€', |
|
450 |
'â'=>'â','Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à', |
|
451 |
'Å'=>'Å','å'=>'å','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä', |
|
452 |
'ä'=>'ä','Ç'=>'Ç','ç'=>'ç','É'=>'É','é'=>'é', |
|
453 |
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','Ë'=>'Ë', |
|
454 |
'ë'=>'ë','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î', |
|
455 |
'Ì'=>'Ì','ì'=>'ì','Ï'=>'Ï','ï'=>'ï','Ñ'=>'Ñ', |
|
456 |
'ñ'=>'ñ','Ó'=>'Ó','ó'=>'ó','Ô'=>'Ô','ô'=>'ô', |
|
457 |
'Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò','ò'=>'ò','Õ'=>'Õ', |
|
458 |
'õ'=>'õ','Ö'=>'Ö','ö'=>'ö','Š'=>'Š','š'=>'š', |
|
459 |
'ß'=>'ß','Ú'=>'Ú','ú'=>'ú','Û'=>'Û','û'=>'û', |
|
460 |
'Ù'=>'Ù','ù'=>'ù','Ü'=>'Ü','ü'=>'ü','Ý'=>'Ý', |
|
461 |
'ý'=>'ý','Ÿ'=>'Ÿ','ÿ'=>'ÿ','©'=>'©','®'=>'®', |
|
462 |
'Ð'=>'Ð','×'=>'×','Ø'=>'Ø','Þ'=>'Þ','ð'=>'ð', |
|
463 |
'ø'=>'ø','þ'=>'þ'); |
|
464 |
if ($charset_in == 'HTML-ENTITIES') { |
|
465 |
$string = strtr($string, $named_to_numbered_entities); |
|
466 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string); |
|
467 |
} |
|
468 |
elseif ($charset_out == 'HTML-ENTITIES') { |
|
469 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string); |
|
470 |
$char = ""; |
|
471 |
while (strlen($string) > 0) { |
|
472 |
preg_match("/^(.)(.*)$/su", $string, $match); |
|
473 |
if (strlen($match[1]) > 1) { |
|
474 |
$char .= "&#".uniord($match[1]).";"; |
|
475 |
} else $char .= $match[1]; |
|
476 |
$string = $match[2]; |
|
477 |
} |
|
478 |
$string = $char; |
|
479 |
$string = strtr($string, $numbered_to_named_entities); |
|
480 |
} |
|
428 |
$string = string_decode_encode_entities($string, $charset_out, $charset_in); |
|
481 | 429 |
return $string; |
482 | 430 |
} |
483 | 431 |
|
... | ... | |
530 | 478 |
// and mbstring _and_ iconv aren't available. |
531 | 479 |
return $string; |
532 | 480 |
} |
533 |
// support-function for mb_convert_encoding_wrapper() |
|
481 |
|
|
482 |
// Decodes or encodes html-entities. Works for utf-8 only! |
|
483 |
function string_decode_encode_entities($string, $out='HTML-ENTITIES', $in='UTF-8') { |
|
484 |
if(!(($in=='UTF-8' || $in=='HTML-ENTITIES') && ($out=='UTF-8' || $out=='HTML-ENTITIES'))) { |
|
485 |
return $string; |
|
486 |
} |
|
487 |
$named_to_numbered_entities=array( |
|
488 |
'Á'=>'Á','á'=>'á', |
|
489 |
'Â'=>'Â','â'=>'â','´'=>'´','Æ'=>'Æ','æ'=>'æ', |
|
490 |
'À'=>'À','à'=>'à','ℵ'=>'ℵ','Α'=>'Α','α'=>'α', |
|
491 |
'&'=>'&','∧'=>'∧','∠'=>'∠','''=>''','Å'=>'Å','å'=>'å', |
|
492 |
'≈'=>'≈','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä','ä'=>'ä', |
|
493 |
'„'=>'„','Β'=>'Β','β'=>'β','¦'=>'¦','•'=>'•', |
|
494 |
'∩'=>'∩','Ç'=>'Ç','ç'=>'ç','¸'=>'¸','¢'=>'¢', |
|
495 |
'Χ'=>'Χ','χ'=>'χ','ˆ'=>'ˆ','♣'=>'♣','≅'=>'≅', |
|
496 |
'©'=>'©','↵'=>'↵','∪'=>'∪','¤'=>'¤','‡'=>'‡', |
|
497 |
'†'=>'†','⇓'=>'⇓','↓'=>'↓','°'=>'°','Δ'=>'Δ', |
|
498 |
'δ'=>'δ','♦'=>'&v#9830;','÷'=>'÷','É'=>'É','é'=>'é', |
|
499 |
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','∅'=>'∅', |
|
500 |
' '=>' ',' '=>' ','Ε'=>'Ε','ε'=>'ε','≡'=>'≡', |
|
501 |
'Η'=>'Η','η'=>'η','Ð'=>'Ð','ð'=>'ð','Ë'=>'Ë','ë'=>'ë', |
|
502 |
'€'=>'€','∃'=>'∃','ƒ'=>'ƒ','∀'=>'∀','½'=>'½', |
|
503 |
'¼'=>'¼','¾'=>'¾','⁄'=>'⁄','Γ'=>'Γ','γ'=>'γ', |
|
504 |
'≥'=>'≥','>'=>'>','⇔'=>'⇔','↔'=>'↔','♥'=>'♥', |
|
505 |
'…'=>'…','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î', |
|
506 |
'¡'=>'¡','Ì'=>'Ì','ì'=>'ì','ℑ'=>'ℑ','∞'=>'∞', |
|
507 |
'∫'=>'∫','Ι'=>'Ι','ι'=>'ι','¿'=>'¿','∈'=>'∈', |
|
508 |
'Ï'=>'Ï','ï'=>'ï','Κ'=>'Κ','κ'=>'κ','Λ'=>'Λ', |
|
509 |
'λ'=>'λ','⟨'=>'〈','«'=>'«','⇐'=>'⇐','←'=>'←', |
|
510 |
'⌈'=>'⌈','“'=>'“','≤'=>'≤','⌊'=>'⌊','∗'=>'∗', |
|
511 |
'◊'=>'◊','‎'=>'‎','‹'=>'‹','‘'=>'‘','<'=>'<', |
|
512 |
'¯'=>'¯','—'=>'—','µ'=>'µ','·'=>'·','−'=>'−', |
|
513 |
'Μ'=>'Μ','μ'=>'μ','∇'=>'∇',' '=>' ','–'=>'–', |
|
514 |
'≠'=>'≠','∋'=>'∋','¬'=>'¬','∉'=>'∉','⊄'=>'⊄', |
|
515 |
'Ñ'=>'Ñ','ñ'=>'ñ','Ν'=>'Ν','ν'=>'ν','Ó'=>'Ó', |
|
516 |
'ó'=>'ó','Ô'=>'Ô','ô'=>'ô','Œ'=>'Œ','œ'=>'œ', |
|
517 |
'Ò'=>'Ò','ò'=>'ò','‾'=>'‾','Ω'=>'Ω','ω'=>'ω', |
|
518 |
'Ο'=>'Ο','ο'=>'ο','⊕'=>'⊕','∨'=>'∨','ª'=>'ª', |
|
519 |
'º'=>'º','Ø'=>'Ø','ø'=>'ø','Õ'=>'Õ','õ'=>'õ', |
|
520 |
'⊗'=>'⊗','Ö'=>'Ö','ö'=>'ö','¶'=>'¶','∂'=>'∂', |
|
521 |
'‰'=>'‰','⊥'=>'⊥','Φ'=>'Φ','φ'=>'φ','Π'=>'Π', |
|
522 |
'π'=>'π','ϖ'=>'ϖ','±'=>'±','£'=>'£','″'=>'″', |
|
523 |
'′'=>'′','∏'=>'∏','∝'=>'∝','Ψ'=>'Ψ','ψ'=>'ψ', |
|
524 |
'"'=>'"','√'=>'√','⟩'=>'〉','»'=>'»','⇒'=>'⇒', |
|
525 |
'→'=>'→','⌉'=>'⌉','”'=>'”','ℜ'=>'ℜ','®'=>'®', |
|
526 |
'⌋'=>'⌋','Ρ'=>'Ρ','ρ'=>'ρ','‏'=>'‏','›'=>'›', |
|
527 |
'’'=>'’','‚'=>'‚','Š'=>'Š','š'=>'š','⋅'=>'⋅', |
|
528 |
'§'=>'§','­'=>'­','Σ'=>'Σ','σ'=>'σ','ς'=>'ς', |
|
529 |
'∼'=>'∼','♠'=>'♠','⊂'=>'⊂','⊆'=>'⊆','∑'=>'∑', |
|
530 |
'⊃'=>'⊃','¹'=>'¹','²'=>'²','³'=>'³','⊇'=>'⊇', |
|
531 |
'ß'=>'ß','Τ'=>'Τ','τ'=>'τ','∴'=>'∴','Θ'=>'Θ', |
|
532 |
'θ'=>'θ','ϑ'=>'ϑ',' '=>' ','Þ'=>'Þ','þ'=>'þ', |
|
533 |
'˜'=>'˜','×'=>'×','™'=>'™','Ú'=>'Ú','ú'=>'ú', |
|
534 |
'⇑'=>'⇑','↑'=>'↑','Û'=>'Û','û'=>'û','Ù'=>'Ù', |
|
535 |
'ù'=>'ù','¨'=>'¨','ϒ'=>'ϒ','Υ'=>'Υ','υ'=>'υ', |
|
536 |
'Ü'=>'Ü','ü'=>'ü','℘'=>'℘','Ξ'=>'Ξ','ξ'=>'ξ', |
|
537 |
'Ý'=>'Ý','ý'=>'ý','¥'=>'¥','Ÿ'=>'Ÿ','ÿ'=>'ÿ', |
|
538 |
'Ζ'=>'Ζ','ζ'=>'ζ','‍'=>'‍','‌'=>'‌' |
|
539 |
); |
|
540 |
$numbered_to_named_entities=array( |
|
541 |
'Á'=>'Á','á'=>'á','Â'=>'Â','â'=>'â','´'=>'´', |
|
542 |
'Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à','ℵ'=>'ℵ', |
|
543 |
'Α'=>'Α','α'=>'α','&'=>'&','∧'=>'∧','∠'=>'∠', |
|
544 |
'''=>''','Å'=>'Å','å'=>'å','≈'=>'≈','Ã'=>'Ã', |
|
545 |
'ã'=>'ã','Ä'=>'Ä','ä'=>'ä','„'=>'„','Β'=>'Β', |
|
546 |
'β'=>'β','¦'=>'¦','•'=>'•','∩'=>'∩','Ç'=>'Ç', |
|
547 |
'ç'=>'ç','¸'=>'¸','¢'=>'¢','Χ'=>'Χ','χ'=>'χ', |
|
548 |
'ˆ'=>'ˆ','♣'=>'♣','≅'=>'≅','©'=>'©','↵'=>'↵', |
|
549 |
'∪'=>'∪','¤'=>'¤','‡'=>'‡','†'=>'†','⇓'=>'⇓', |
|
550 |
'↓'=>'↓','°'=>'°','Δ'=>'Δ','δ'=>'δ','&v#9830;'=>'♦', |
|
551 |
'÷'=>'÷','É'=>'É','é'=>'é','Ê'=>'Ê','ê'=>'ê', |
|
552 |
'È'=>'È','è'=>'è','∅'=>'∅',' '=>' ',' '=>' ', |
|
553 |
'Ε'=>'Ε','ε'=>'ε','≡'=>'≡','Η'=>'Η','η'=>'η', |
|
554 |
'Ð'=>'Ð','ð'=>'ð','Ë'=>'Ë','ë'=>'ë','€'=>'€', |
|
555 |
'∃'=>'∃','ƒ'=>'ƒ','∀'=>'∀','½'=>'½','¼'=>'¼', |
|
556 |
'¾'=>'¾','⁄'=>'⁄','Γ'=>'Γ','γ'=>'γ','≥'=>'≥', |
|
557 |
'>'=>'>','⇔'=>'⇔','↔'=>'↔','♥'=>'♥','…'=>'…', |
|
558 |
'Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î','¡'=>'¡', |
|
559 |
'Ì'=>'Ì','ì'=>'ì','ℑ'=>'ℑ','∞'=>'∞','∫'=>'∫', |
|
560 |
'Ι'=>'Ι','ι'=>'ι','¿'=>'¿','∈'=>'∈','Ï'=>'Ï', |
|
561 |
'ï'=>'ï','Κ'=>'Κ','κ'=>'κ','Λ'=>'Λ','λ'=>'λ', |
|
562 |
'〈'=>'⟨','«'=>'«','⇐'=>'⇐','←'=>'←','⌈'=>'⌈', |
|
563 |
'“'=>'“','≤'=>'≤','⌊'=>'⌊','∗'=>'∗','◊'=>'◊', |
|
564 |
'‎'=>'‎','‹'=>'‹','‘'=>'‘','<'=>'<','¯'=>'¯', |
|
565 |
'—'=>'—','µ'=>'µ','·'=>'·','−'=>'−','Μ'=>'Μ', |
|
566 |
'μ'=>'μ','∇'=>'∇',' '=>' ','–'=>'–','≠'=>'≠', |
|
567 |
'∋'=>'∋','¬'=>'¬','∉'=>'∉','⊄'=>'⊄','Ñ'=>'Ñ', |
|
568 |
'ñ'=>'ñ','Ν'=>'Ν','ν'=>'ν','Ó'=>'Ó','ó'=>'ó', |
|
569 |
'Ô'=>'Ô','ô'=>'ô','Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò', |
|
570 |
'ò'=>'ò','‾'=>'‾','Ω'=>'Ω','ω'=>'ω','Ο'=>'Ο', |
|
571 |
'ο'=>'ο','⊕'=>'⊕','∨'=>'∨','ª'=>'ª','º'=>'º', |
|
572 |
'Ø'=>'Ø','ø'=>'ø','Õ'=>'Õ','õ'=>'õ','⊗'=>'⊗', |
|
573 |
'Ö'=>'Ö','ö'=>'ö','¶'=>'¶','∂'=>'∂','‰'=>'‰', |
|
574 |
'⊥'=>'⊥','Φ'=>'Φ','φ'=>'φ','Π'=>'Π','π'=>'π','ϖ'=>'ϖ', |
|
575 |
'±'=>'±','£'=>'£','″'=>'″','′'=>'′','∏'=>'∏', |
|
576 |
'∝'=>'∝','Ψ'=>'Ψ','ψ'=>'ψ','"'=>'"','√'=>'√', |
|
577 |
'〉'=>'⟩','»'=>'»','⇒'=>'⇒','→'=>'→','⌉'=>'⌉', |
|
578 |
'”'=>'”','ℜ'=>'ℜ','®'=>'®','⌋'=>'⌋','Ρ'=>'Ρ', |
|
579 |
'ρ'=>'ρ','‏'=>'‏','›'=>'›','’'=>'’','‚'=>'‚', |
|
580 |
'Š'=>'Š','š'=>'š','⋅'=>'⋅','§'=>'§','­'=>'­', |
|
581 |
'Σ'=>'Σ','σ'=>'σ','ς'=>'ς','∼'=>'∼','♠'=>'♠', |
|
582 |
'⊂'=>'⊂','⊆'=>'⊆','∑'=>'∑','⊃'=>'⊃','¹'=>'¹', |
|
583 |
'²'=>'²','³'=>'³','⊇'=>'⊇','ß'=>'ß','Τ'=>'Τ', |
|
584 |
'τ'=>'τ','∴'=>'∴','Θ'=>'Θ','θ'=>'θ','ϑ'=>'ϑ', |
|
585 |
' '=>' ','Þ'=>'Þ','þ'=>'þ','˜'=>'˜','×'=>'×', |
|
586 |
'™'=>'™','Ú'=>'Ú','ú'=>'ú','⇑'=>'⇑','↑'=>'↑', |
|
587 |
'Û'=>'Û','û'=>'û','Ù'=>'Ù','ù'=>'ù','¨'=>'¨', |
|
588 |
'ϒ'=>'ϒ','Υ'=>'Υ','υ'=>'υ','Ü'=>'Ü','ü'=>'ü', |
|
589 |
'℘'=>'℘','Ξ'=>'Ξ','ξ'=>'ξ','Ý'=>'Ý','ý'=>'ý', |
|
590 |
'¥'=>'¥','Ÿ'=>'Ÿ','ÿ'=>'ÿ','Ζ'=>'Ζ','ζ'=>'ζ','‍'=>'‍', |
|
591 |
'‌'=>'‌' |
|
592 |
); |
|
593 |
|
|
594 |
if ($in == 'HTML-ENTITIES') { |
|
595 |
$string = strtr($string, $named_to_numbered_entities); |
|
596 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string); |
|
597 |
} |
|
598 |
elseif ($out == 'HTML-ENTITIES') { |
|
599 |
//$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string); |
|
600 |
$char = ""; |
|
601 |
while (strlen($string) > 0) { |
|
602 |
preg_match("/^(.)(.*)$/su", $string, $match); |
|
603 |
if (strlen($match[1]) > 1) { |
|
604 |
$char .= "&#".uniord($match[1]).";"; |
|
605 |
} else $char .= $match[1]; |
|
606 |
$string = $match[2]; |
|
607 |
} |
|
608 |
$string = $char; |
|
609 |
$string = strtr($string, $numbered_to_named_entities); |
|
610 |
} |
|
611 |
return $string; |
|
612 |
} |
|
613 |
|
|
614 |
// support-function for string_decode_encode_entities() |
|
534 | 615 |
function uniord($c) { |
535 | 616 |
$ud = 0; |
536 | 617 |
if (ord($c{0}) >= 0 && ord($c{0}) <= 127) $ud = ord($c{0}); |
... | ... | |
604 | 685 |
function entities_to_umlauts($string, $charset_out=DEFAULT_CHARSET, $convert_htmlspecialchars=0) { |
605 | 686 |
$charset_out = strtoupper($charset_out); |
606 | 687 |
if ($charset_out == '') { $charset_out = 'ISO-8859-1'; } |
607 |
$string = string_to_utf8($string); |
|
608 |
if($charset_out!='UTF-8' && is_UTF8($string)) { |
|
688 |
$charset_in = strtoupper(DEFAULT_CHARSET); |
|
689 |
|
|
690 |
// string to utf-8 |
|
691 |
if ($charset_in == 'ISO-8859-1' || $charset_in == 'UTF-8') { |
|
692 |
if ($charset_in == 'ISO-8859-1') { |
|
693 |
$string=utf8_encode($string); |
|
694 |
} |
|
695 |
// decode html-entities |
|
696 |
if(preg_match("/&[#a-zA-Z0-9]+;/", $string)) { |
|
697 |
$string=string_decode_encode_entities($string, 'UTF-8', 'HTML-ENTITIES'); |
|
698 |
} |
|
699 |
} |
|
700 |
else { |
|
701 |
$string = string_to_utf8($string); // will decode html-entities, too. |
|
702 |
} |
|
703 |
// string to $charset_out |
|
704 |
if($charset_out == 'ISO-8859-1') { |
|
705 |
$string=utf8_decode($string); |
|
706 |
} |
|
707 |
elseif($charset_out != 'UTF-8' && is_UTF8($string)) { |
|
609 | 708 |
$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8'); |
610 | 709 |
} |
611 |
return($string);
|
|
612 |
} |
|
710 |
return $string;
|
|
711 |
}
|
|
613 | 712 |
|
614 | 713 |
// Function to convert a string from mixed html-entitites/$charset_in-umlauts to pure html-entities |
615 | 714 |
function umlauts_to_entities($string, $charset_in=DEFAULT_CHARSET, $convert_htmlspecialchars=0) { |
616 | 715 |
$charset_in = strtoupper($charset_in); |
617 | 716 |
if ($charset_in == "") { $charset_in = 'ISO-8859-1'; } |
618 |
$string = string_to_utf8($string, $charset_in); |
|
619 |
if (is_UTF8($string)) { |
|
620 |
$string=mb_convert_encoding_wrapper($string,'HTML-ENTITIES','UTF-8'); |
|
717 |
|
|
718 |
// string to utf-8 |
|
719 |
if ($charset_in == 'ISO-8859-1' || $charset_in == 'UTF-8') { |
|
720 |
if ($charset_in == 'ISO-8859-1') { |
|
721 |
$string=utf8_encode($string); |
|
722 |
} |
|
723 |
// encode html-entities |
|
724 |
$string=string_decode_encode_entities($string, 'HTML-ENTITIES', 'UTF-8'); |
|
725 |
//$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8'); |
|
621 | 726 |
} |
622 |
return($string); |
|
727 |
else { |
|
728 |
$string = string_to_utf8($string, $charset_in); |
|
729 |
// encode html-entities |
|
730 |
if (is_UTF8($string)) { |
|
731 |
$string=string_decode_encode_entities($string, 'HTML-ENTITIES', 'UTF-8'); |
|
732 |
//$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8'); |
|
733 |
} |
|
734 |
} |
|
735 |
return $string; |
|
623 | 736 |
} |
624 | 737 |
|
625 | 738 |
// translate any latin/greek/cyrillic html-entities to their plain 7bit equivalents |
Also available in: Unified diff
Added function wich replaces mbstring cause this seems not to work properly on some php4 versions (Thanks to thorn)