Revision 552
Added by thorn almost 18 years ago
| functions.php | ||
|---|---|---|
| 341 | 341 |
} |
| 342 | 342 |
|
| 343 | 343 |
// Function as replacement for php's htmlspecialchars() |
| 344 |
// Will not mangle HTML-entities |
|
| 344 | 345 |
function my_htmlspecialchars($string) {
|
| 345 |
$string = preg_replace("/&(?=[#a-z0-9]+;)/i", "_x_", $string);
|
|
| 346 |
$string = strtr($string, array("<"=>"<", ">"=>">", "&"=>"&", "\""=>""", "\'"=>"'"));
|
|
| 347 |
$string = preg_replace("/_x_(?=[#a-z0-9]+;)/i", "&", $string);
|
|
| 346 |
$string = preg_replace('/&(?=[#a-z0-9]+;)/i', '__amp;_', $string);
|
|
| 347 |
$string = strtr($string, array('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"', '\''=>'''));
|
|
| 348 |
$string = preg_replace('/__amp;_(?=[#a-z0-9]+;)/i', '&', $string);
|
|
| 348 | 349 |
return($string); |
| 349 | 350 |
} |
| 350 | 351 |
|
| 351 |
// Function to convert a string from $from- to $to-encoding, using mysql |
|
| 352 |
function my_mysql_iconv($string, $from, $to) {
|
|
| 353 |
// keep current character set values |
|
| 354 |
global $database; |
|
| 355 |
$query = $database->query("SELECT @@character_set_client");
|
|
| 356 |
if($query->numRows() > 0) {
|
|
| 357 |
$res = $query->fetchRow(); |
|
| 358 |
$character_set_database = $res['@@character_set_client']; |
|
| 359 |
} else { echo mysql_error()."\n<br />"; }
|
|
| 360 |
$query = $database->query("SELECT @@character_set_results");
|
|
| 361 |
if($query->numRows() > 0) {
|
|
| 362 |
$res = $query->fetchRow(); |
|
| 363 |
$character_set_results = $res['@@character_set_results']; |
|
| 364 |
} else { echo mysql_error()."\n<br />"; }
|
|
| 365 |
$query = $database->query("SELECT @@collation_connection");
|
|
| 366 |
if($query->numRows() > 0) {
|
|
| 367 |
$res = $query->fetchRow(); |
|
| 368 |
$collation_results = $res['@@collation_connection']; |
|
| 369 |
} else { echo mysql_error()."\n<br />"; }
|
|
| 370 |
// set new character set values |
|
| 371 |
$query = $database->query("SET character_set_client=$from");
|
|
| 372 |
$query = $database->query("SET character_set_results=$to");
|
|
| 373 |
$query = $database->query("SET collation_connection=utf8_unicode_ci");
|
|
| 374 |
$string_escaped = mysql_real_escape_string($string); |
|
| 375 |
// convert the string |
|
| 376 |
$query = $database->query("SELECT '$string_escaped'");
|
|
| 377 |
if($query->numRows() > 0) {
|
|
| 378 |
$res = $query->fetchRow(); |
|
| 379 |
$converted_string = $res[0]; |
|
| 380 |
} else { echo mysql_error()."\n<br />"; }
|
|
| 381 |
// restore previous character set values |
|
| 382 |
$query = $database->query("SET character_set_client=$character_set_database");
|
|
| 383 |
$query = $database->query("SET character_set_results=$character_set_results");
|
|
| 384 |
$query = $database->query("SET collation_connection=$collation_results");
|
|
| 385 |
return $converted_string; |
|
| 386 |
} |
|
| 387 |
|
|
| 388 |
// Function as wrapper for mb_convert_encoding |
|
| 389 |
// converts $charset_in to $charset_out or |
|
| 390 |
// UTF-8 to HTML-ENTITIES or HTML-ENTITIES to UTF-8 |
|
| 391 |
function mb_convert_encoding_wrapper($string, $charset_out, $charset_in) {
|
|
| 392 |
if ($charset_out == $charset_in) {
|
|
| 393 |
return $string; |
|
| 352 |
// init utf8-functions -- workaround to prevent functions-utf8.php and charsets_table.php (~140kB) to be loaded more than once |
|
| 353 |
// functions and arrays from functions-utf8.php and charsets_table.php will be in global name-space |
|
| 354 |
function init_utf8funcs() {
|
|
| 355 |
static $utf8_ok=0; |
|
| 356 |
if($utf8_ok == 0) {
|
|
| 357 |
++$utf8_ok; |
|
| 358 |
// debug XXX to be removed |
|
| 359 |
if($utf8_ok > 1) |
|
| 360 |
trigger_error("init_utf8funcs: utf8_ok > 1", E_USER_ERROR);
|
|
| 361 |
// XXX remove end |
|
| 362 |
require_once(WB_PATH.'/framework/functions-utf8.php'); |
|
| 394 | 363 |
} |
| 395 |
$use_iconv = true; |
|
| 396 |
$use_mbstring = true; |
|
| 397 |
/* |
|
| 398 |
if(version_compare(PHP_VERSION, "5.1.0", "<")) {
|
|
| 399 |
$use_mbstring = false; // don't rely on mb_convert_encoding if php<5.1.0 |
|
| 400 |
$use_iconv = false; // don't rely on iconv neither |
|
| 401 |
} |
|
| 402 |
*/ |
|
| 403 |
|
|
| 404 |
// try mb_convert_encoding(). This can handle to or from HTML-ENTITIES, too |
|
| 405 |
if ($use_mbstring && function_exists('mb_convert_encoding')) {
|
|
| 406 |
// there's no GB2312 or ISO-8859-11 encoding in php's mb_* functions |
|
| 407 |
if ($charset_in=='ISO-8859-11' || $charset_in=='GB2312') {
|
|
| 408 |
if ($use_iconv && function_exists('iconv')) {
|
|
| 409 |
$string = iconv($charset_in, 'UTF-8', $string); |
|
| 410 |
} |
|
| 411 |
else {
|
|
| 412 |
if ($charset_in == 'GB2312') {
|
|
| 413 |
$string=my_mysql_iconv($string, 'gb2312', 'utf8'); |
|
| 414 |
} else {
|
|
| 415 |
$string=my_mysql_iconv($string, 'tis620', 'utf8'); |
|
| 416 |
} |
|
| 417 |
} |
|
| 418 |
$charset_in='UTF-8'; |
|
| 419 |
if ($charset_out == 'UTF-8') {
|
|
| 420 |
return $string; |
|
| 421 |
} |
|
| 422 |
} |
|
| 423 |
if ($charset_out=='ISO-8859-11' || $charset_out=='GB2312') {
|
|
| 424 |
$string=mb_convert_encoding($string, 'UTF-8', $charset_in); |
|
| 425 |
if ($use_iconv && function_exists('iconv')) {
|
|
| 426 |
$string = iconv('UTF-8', $charset_out, $string);
|
|
| 427 |
} |
|
| 428 |
else {
|
|
| 429 |
if ($charset_out == 'GB2312') {
|
|
| 430 |
$string=my_mysql_iconv($string, 'utf8', 'gb2312'); |
|
| 431 |
} else {
|
|
| 432 |
$string=my_mysql_iconv($string, 'utf8', 'tis620'); |
|
| 433 |
} |
|
| 434 |
} |
|
| 435 |
} else {
|
|
| 436 |
$string = strtr($string, array("<"=>"&_lt;", ">"=>"&_gt;", "&"=>"&_amp;", """=>"&_quot;", "'"=>"&_#39;"));
|
|
| 437 |
$string=mb_convert_encoding($string, $charset_out, $charset_in); |
|
| 438 |
$string = strtr($string, array("&_lt;"=>"<", "&_gt;"=>">", "&_amp;"=>"&", "&_quot;"=>""", "&_#39;"=>"'"));
|
|
| 439 |
} |
|
| 440 |
return $string; |
|
| 441 |
} |
|
| 442 |
|
|
| 443 |
// try iconv(). This can't handle to or from HTML-ENTITIES. |
|
| 444 |
if ($use_iconv && function_exists('iconv') && $charset_out!='HTML-ENTITIES' && $charset_in!='HTML-ENTITIES' ) {
|
|
| 445 |
$string = iconv($charset_in, $charset_out, $string); |
|
| 446 |
return $string; |
|
| 447 |
} |
|
| 448 |
|
|
| 449 |
// do the UTF-8->HTML-ENTITIES or HTML-ENTITIES->UTF-8 translation if mb_convert_encoding isn't available |
|
| 450 |
if (($charset_in=='HTML-ENTITIES' && $charset_out=='UTF-8') || ($charset_in=='UTF-8' && $charset_out=='HTML-ENTITIES')) {
|
|
| 451 |
$string = string_decode_encode_entities($string, $charset_out, $charset_in); |
|
| 452 |
return $string; |
|
| 453 |
} |
|
| 454 |
|
|
| 455 |
// mb_convert_encoding() and iconv() aren't available, so use my_mysql_iconv() |
|
| 456 |
if ($charset_in == 'ISO-8859-1') { $mysqlcharset_from = 'latin1'; }
|
|
| 457 |
elseif ($charset_in == 'ISO-8859-2') { $mysqlcharset_from = 'latin2'; }
|
|
| 458 |
elseif ($charset_in == 'ISO-8859-3') { $mysqlcharset_from = 'latin1'; }
|
|
| 459 |
elseif ($charset_in == 'ISO-8859-4') { $mysqlcharset_from = 'latin7'; }
|
|
| 460 |
elseif ($charset_in == 'ISO-8859-5') { $string = convert_cyr_string ($string, "iso8859-5", "windows-1251" ); $mysqlcharset_from = 'cp1251'; }
|
|
| 461 |
elseif ($charset_in == 'ISO-8859-6') { $mysqlcharset_from = ''; } //?
|
|
| 462 |
elseif ($charset_in == 'ISO-8859-7') { $mysqlcharset_from = 'greek'; }
|
|
| 463 |
elseif ($charset_in == 'ISO-8859-8') { $mysqlcharset_from = 'hebrew'; }
|
|
| 464 |
elseif ($charset_in == 'ISO-8859-9') { $mysqlcharset_from = 'latin5'; }
|
|
| 465 |
elseif ($charset_in == 'ISO-8859-10') { $mysqlcharset_from = 'latin1'; }
|
|
| 466 |
elseif ($charset_in == 'BIG5') { $mysqlcharset_from = 'big5'; }
|
|
| 467 |
elseif ($charset_in == 'ISO-2022-JP') { $mysqlcharset_from = ''; } //?
|
|
| 468 |
elseif ($charset_in == 'ISO-2022-KR') { $mysqlcharset_from = ''; } //?
|
|
| 469 |
elseif ($charset_in == 'GB2312') { $mysqlcharset_from = 'gb2312'; }
|
|
| 470 |
elseif ($charset_in == 'ISO-8859-11') { $mysqlcharset_from = 'tis620'; }
|
|
| 471 |
elseif ($charset_in == 'UTF-8') { $mysqlcharset_from = 'utf8'; }
|
|
| 472 |
else { $mysqlcharset_from = 'latin1'; }
|
|
| 473 |
|
|
| 474 |
if ($charset_out == 'ISO-8859-1') { $mysqlcharset_to = 'latin1'; }
|
|
| 475 |
elseif ($charset_out == 'ISO-8859-2') { $mysqlcharset_to = 'latin2'; }
|
|
| 476 |
elseif ($charset_out == 'ISO-8859-3') { $mysqlcharset_to = 'latin1'; }
|
|
| 477 |
elseif ($charset_out == 'ISO-8859-4') { $mysqlcharset_to = 'latin7'; }
|
|
| 478 |
elseif ($charset_out == 'ISO-8859-5') { $mysqlcharset_to = 'cp1251'; } // use convert_cyr_string afterwards
|
|
| 479 |
elseif ($charset_out == 'ISO-8859-6') { $mysqlcharset_to = ''; } //?
|
|
| 480 |
elseif ($charset_out == 'ISO-8859-7') { $mysqlcharset_to = 'greek'; }
|
|
| 481 |
elseif ($charset_out == 'ISO-8859-8') { $mysqlcharset_to = 'hebrew'; }
|
|
| 482 |
elseif ($charset_out == 'ISO-8859-9') { $mysqlcharset_to = 'latin5'; }
|
|
| 483 |
elseif ($charset_out == 'ISO-8859-10') { $mysqlcharset_to = 'latin1'; }
|
|
| 484 |
elseif ($charset_out == 'BIG5') { $mysqlcharset_to = 'big5'; }
|
|
| 485 |
elseif ($charset_out == 'ISO-2022-JP') { $mysqlcharset_to = ''; } //?
|
|
| 486 |
elseif ($charset_out == 'ISO-2022-KR') { $mysqlcharset_to = ''; } //?
|
|
| 487 |
elseif ($charset_out == 'GB2312') { $mysqlcharset_to = 'gb2312'; }
|
|
| 488 |
elseif ($charset_out == 'ISO-8859-11') { $mysqlcharset_to = 'tis620'; }
|
|
| 489 |
elseif ($charset_out == 'UTF-8') { $mysqlcharset_to = 'utf8'; }
|
|
| 490 |
else { $mysqlcharset_to = 'latin1'; }
|
|
| 491 |
|
|
| 492 |
if ($mysqlcharset_from!="" && $mysqlcharset_to!="" && $mysqlcharset_from!=$mysqlcharset_to) {
|
|
| 493 |
$string=my_mysql_iconv($string, $mysqlcharset_from, $mysqlcharset_to); |
|
| 494 |
if ($mysqlcharset_to == 'cp1251') {
|
|
| 495 |
$string = convert_cyr_string ($string, "windows-1251", "iso-8859-5" ); |
|
| 496 |
} |
|
| 497 |
return($string); |
|
| 498 |
} |
|
| 499 |
|
|
| 500 |
// $string is unchanged. This will happen if we have to deal with ISO-8859-6 or ISO-2022-JP or -KR |
|
| 501 |
// and mbstring _and_ iconv aren't available. |
|
| 502 |
return $string; |
|
| 503 | 364 |
} |
| 504 | 365 |
|
| 505 |
// Decodes or encodes html-entities. Works for utf-8 only! |
|
| 506 |
function string_decode_encode_entities($string, $out='HTML-ENTITIES', $in='UTF-8') {
|
|
| 507 |
if(!(($in=='UTF-8' || $in=='HTML-ENTITIES') && ($out=='UTF-8' || $out=='HTML-ENTITIES'))) {
|
|
| 508 |
return $string; |
|
| 509 |
} |
|
| 510 |
$named_to_numbered_entities=array( |
|
| 511 |
'Á'=>'Á','á'=>'á', |
|
| 512 |
'Â'=>'Â','â'=>'â','´'=>'´','Æ'=>'Æ','æ'=>'æ', |
|
| 513 |
'À'=>'À','à'=>'à','ℵ'=>'ℵ','Α'=>'Α','α'=>'α', |
|
| 514 |
'∧'=>'∧','∠'=>'∠','''=>''','Å'=>'Å','å'=>'å', |
|
| 515 |
'≈'=>'≈','Ã'=>'Ã','ã'=>'ã','Ä'=>'Ä','ä'=>'ä', |
|
| 516 |
'„'=>'„','Β'=>'Β','β'=>'β','¦'=>'¦','•'=>'•', |
|
| 517 |
'∩'=>'∩','Ç'=>'Ç','ç'=>'ç','¸'=>'¸','¢'=>'¢', |
|
| 518 |
'Χ'=>'Χ','χ'=>'χ','ˆ'=>'ˆ','♣'=>'♣','≅'=>'≅', |
|
| 519 |
'©'=>'©','↵'=>'↵','∪'=>'∪','¤'=>'¤','‡'=>'‡', |
|
| 520 |
'†'=>'†','⇓'=>'⇓','↓'=>'↓','°'=>'°','Δ'=>'Δ', |
|
| 521 |
'δ'=>'δ','♦'=>'&v#9830;','÷'=>'÷','É'=>'É','é'=>'é', |
|
| 522 |
'Ê'=>'Ê','ê'=>'ê','È'=>'È','è'=>'è','∅'=>'∅', |
|
| 523 |
' '=>' ',' '=>' ','Ε'=>'Ε','ε'=>'ε','≡'=>'≡', |
|
| 524 |
'Η'=>'Η','η'=>'η','Ð'=>'Ð','ð'=>'ð','Ë'=>'Ë','ë'=>'ë', |
|
| 525 |
'€'=>'€','∃'=>'∃','ƒ'=>'ƒ','∀'=>'∀','½'=>'½', |
|
| 526 |
'¼'=>'¼','¾'=>'¾','⁄'=>'⁄','Γ'=>'Γ','γ'=>'γ', |
|
| 527 |
'≥'=>'≥','⇔'=>'⇔','↔'=>'↔','♥'=>'♥', |
|
| 528 |
'…'=>'…','Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î', |
|
| 529 |
'¡'=>'¡','Ì'=>'Ì','ì'=>'ì','ℑ'=>'ℑ','∞'=>'∞', |
|
| 530 |
'∫'=>'∫','Ι'=>'Ι','ι'=>'ι','¿'=>'¿','∈'=>'∈', |
|
| 531 |
'Ï'=>'Ï','ï'=>'ï','Κ'=>'Κ','κ'=>'κ','Λ'=>'Λ', |
|
| 532 |
'λ'=>'λ','⟨'=>'〈','«'=>'«','⇐'=>'⇐','←'=>'←', |
|
| 533 |
'⌈'=>'⌈','“'=>'“','≤'=>'≤','⌊'=>'⌊','∗'=>'∗', |
|
| 534 |
'◊'=>'◊','‎'=>'‎','‹'=>'‹','‘'=>'‘', |
|
| 535 |
'¯'=>'¯','—'=>'—','µ'=>'µ','·'=>'·','−'=>'−', |
|
| 536 |
'Μ'=>'Μ','μ'=>'μ','∇'=>'∇',' '=>' ','–'=>'–', |
|
| 537 |
'≠'=>'≠','∋'=>'∋','¬'=>'¬','∉'=>'∉','⊄'=>'⊄', |
|
| 538 |
'Ñ'=>'Ñ','ñ'=>'ñ','Ν'=>'Ν','ν'=>'ν','Ó'=>'Ó', |
|
| 539 |
'ó'=>'ó','Ô'=>'Ô','ô'=>'ô','Œ'=>'Œ','œ'=>'œ', |
|
| 540 |
'Ò'=>'Ò','ò'=>'ò','‾'=>'‾','Ω'=>'Ω','ω'=>'ω', |
|
| 541 |
'Ο'=>'Ο','ο'=>'ο','⊕'=>'⊕','∨'=>'∨','ª'=>'ª', |
|
| 542 |
'º'=>'º','Ø'=>'Ø','ø'=>'ø','Õ'=>'Õ','õ'=>'õ', |
|
| 543 |
'⊗'=>'⊗','Ö'=>'Ö','ö'=>'ö','¶'=>'¶','∂'=>'∂', |
|
| 544 |
'‰'=>'‰','⊥'=>'⊥','Φ'=>'Φ','φ'=>'φ','Π'=>'Π', |
|
| 545 |
'π'=>'π','ϖ'=>'ϖ','±'=>'±','£'=>'£','″'=>'″', |
|
| 546 |
'′'=>'′','∏'=>'∏','∝'=>'∝','Ψ'=>'Ψ','ψ'=>'ψ', |
|
| 547 |
'"'=>'"','√'=>'√','⟩'=>'〉','»'=>'»','⇒'=>'⇒', |
|
| 548 |
'→'=>'→','⌉'=>'⌉','”'=>'”','ℜ'=>'ℜ','®'=>'®', |
|
| 549 |
'⌋'=>'⌋','Ρ'=>'Ρ','ρ'=>'ρ','‏'=>'‏','›'=>'›', |
|
| 550 |
'’'=>'’','‚'=>'‚','Š'=>'Š','š'=>'š','⋅'=>'⋅', |
|
| 551 |
'§'=>'§','­'=>'­','Σ'=>'Σ','σ'=>'σ','ς'=>'ς', |
|
| 552 |
'∼'=>'∼','♠'=>'♠','⊂'=>'⊂','⊆'=>'⊆','∑'=>'∑', |
|
| 553 |
'⊃'=>'⊃','¹'=>'¹','²'=>'²','³'=>'³','⊇'=>'⊇', |
|
| 554 |
'ß'=>'ß','Τ'=>'Τ','τ'=>'τ','∴'=>'∴','Θ'=>'Θ', |
|
| 555 |
'θ'=>'θ','ϑ'=>'ϑ',' '=>' ','Þ'=>'Þ','þ'=>'þ', |
|
| 556 |
'˜'=>'˜','×'=>'×','™'=>'™','Ú'=>'Ú','ú'=>'ú', |
|
| 557 |
'⇑'=>'⇑','↑'=>'↑','Û'=>'Û','û'=>'û','Ù'=>'Ù', |
|
| 558 |
'ù'=>'ù','¨'=>'¨','ϒ'=>'ϒ','Υ'=>'Υ','υ'=>'υ', |
|
| 559 |
'Ü'=>'Ü','ü'=>'ü','℘'=>'℘','Ξ'=>'Ξ','ξ'=>'ξ', |
|
| 560 |
'Ý'=>'Ý','ý'=>'ý','¥'=>'¥','Ÿ'=>'Ÿ','ÿ'=>'ÿ', |
|
| 561 |
'Ζ'=>'Ζ','ζ'=>'ζ','‍'=>'‍','‌'=>'‌' |
|
| 562 |
); |
|
| 563 |
$numbered_to_named_entities=array( |
|
| 564 |
'Á'=>'Á','á'=>'á','Â'=>'Â','â'=>'â','´'=>'´', |
|
| 565 |
'Æ'=>'Æ','æ'=>'æ','À'=>'À','à'=>'à','ℵ'=>'ℵ', |
|
| 566 |
'Α'=>'Α','α'=>'α','∧'=>'∧','∠'=>'∠', |
|
| 567 |
'''=>''','Å'=>'Å','å'=>'å','≈'=>'≈','Ã'=>'Ã', |
|
| 568 |
'ã'=>'ã','Ä'=>'Ä','ä'=>'ä','„'=>'„','Β'=>'Β', |
|
| 569 |
'β'=>'β','¦'=>'¦','•'=>'•','∩'=>'∩','Ç'=>'Ç', |
|
| 570 |
'ç'=>'ç','¸'=>'¸','¢'=>'¢','Χ'=>'Χ','χ'=>'χ', |
|
| 571 |
'ˆ'=>'ˆ','♣'=>'♣','≅'=>'≅','©'=>'©','↵'=>'↵', |
|
| 572 |
'∪'=>'∪','¤'=>'¤','‡'=>'‡','†'=>'†','⇓'=>'⇓', |
|
| 573 |
'↓'=>'↓','°'=>'°','Δ'=>'Δ','δ'=>'δ','&v#9830;'=>'♦', |
|
| 574 |
'÷'=>'÷','É'=>'É','é'=>'é','Ê'=>'Ê','ê'=>'ê', |
|
| 575 |
'È'=>'È','è'=>'è','∅'=>'∅',' '=>' ',' '=>' ', |
|
| 576 |
'Ε'=>'Ε','ε'=>'ε','≡'=>'≡','Η'=>'Η','η'=>'η', |
|
| 577 |
'Ð'=>'Ð','ð'=>'ð','Ë'=>'Ë','ë'=>'ë','€'=>'€', |
|
| 578 |
'∃'=>'∃','ƒ'=>'ƒ','∀'=>'∀','½'=>'½','¼'=>'¼', |
|
| 579 |
'¾'=>'¾','⁄'=>'⁄','Γ'=>'Γ','γ'=>'γ','≥'=>'≥', |
|
| 580 |
'⇔'=>'⇔','↔'=>'↔','♥'=>'♥','…'=>'…', |
|
| 581 |
'Í'=>'Í','í'=>'í','Î'=>'Î','î'=>'î','¡'=>'¡', |
|
| 582 |
'Ì'=>'Ì','ì'=>'ì','ℑ'=>'ℑ','∞'=>'∞','∫'=>'∫', |
|
| 583 |
'Ι'=>'Ι','ι'=>'ι','¿'=>'¿','∈'=>'∈','Ï'=>'Ï', |
|
| 584 |
'ï'=>'ï','Κ'=>'Κ','κ'=>'κ','Λ'=>'Λ','λ'=>'λ', |
|
| 585 |
'〈'=>'⟨','«'=>'«','⇐'=>'⇐','←'=>'←','⌈'=>'⌈', |
|
| 586 |
'“'=>'“','≤'=>'≤','⌊'=>'⌊','∗'=>'∗','◊'=>'◊', |
|
| 587 |
'‎'=>'‎','‹'=>'‹','‘'=>'‘','¯'=>'¯', |
|
| 588 |
'—'=>'—','µ'=>'µ','·'=>'·','−'=>'−','Μ'=>'Μ', |
|
| 589 |
'μ'=>'μ','∇'=>'∇',' '=>' ','–'=>'–','≠'=>'≠', |
|
| 590 |
'∋'=>'∋','¬'=>'¬','∉'=>'∉','⊄'=>'⊄','Ñ'=>'Ñ', |
|
| 591 |
'ñ'=>'ñ','Ν'=>'Ν','ν'=>'ν','Ó'=>'Ó','ó'=>'ó', |
|
| 592 |
'Ô'=>'Ô','ô'=>'ô','Œ'=>'Œ','œ'=>'œ','Ò'=>'Ò', |
|
| 593 |
'ò'=>'ò','‾'=>'‾','Ω'=>'Ω','ω'=>'ω','Ο'=>'Ο', |
|
| 594 |
'ο'=>'ο','⊕'=>'⊕','∨'=>'∨','ª'=>'ª','º'=>'º', |
|
| 595 |
'Ø'=>'Ø','ø'=>'ø','Õ'=>'Õ','õ'=>'õ','⊗'=>'⊗', |
|
| 596 |
'Ö'=>'Ö','ö'=>'ö','¶'=>'¶','∂'=>'∂','‰'=>'‰', |
|
| 597 |
'⊥'=>'⊥','Φ'=>'Φ','φ'=>'φ','Π'=>'Π','π'=>'π','ϖ'=>'ϖ', |
|
| 598 |
'±'=>'±','£'=>'£','″'=>'″','′'=>'′','∏'=>'∏', |
|
| 599 |
'∝'=>'∝','Ψ'=>'Ψ','ψ'=>'ψ','"'=>'"','√'=>'√', |
|
| 600 |
'〉'=>'⟩','»'=>'»','⇒'=>'⇒','→'=>'→','⌉'=>'⌉', |
|
| 601 |
'”'=>'”','ℜ'=>'ℜ','®'=>'®','⌋'=>'⌋','Ρ'=>'Ρ', |
|
| 602 |
'ρ'=>'ρ','‏'=>'‏','›'=>'›','’'=>'’','‚'=>'‚', |
|
| 603 |
'Š'=>'Š','š'=>'š','⋅'=>'⋅','§'=>'§','­'=>'­', |
|
| 604 |
'Σ'=>'Σ','σ'=>'σ','ς'=>'ς','∼'=>'∼','♠'=>'♠', |
|
| 605 |
'⊂'=>'⊂','⊆'=>'⊆','∑'=>'∑','⊃'=>'⊃','¹'=>'¹', |
|
| 606 |
'²'=>'²','³'=>'³','⊇'=>'⊇','ß'=>'ß','Τ'=>'Τ', |
|
| 607 |
'τ'=>'τ','∴'=>'∴','Θ'=>'Θ','θ'=>'θ','ϑ'=>'ϑ', |
|
| 608 |
' '=>' ','Þ'=>'Þ','þ'=>'þ','˜'=>'˜','×'=>'×', |
|
| 609 |
'™'=>'™','Ú'=>'Ú','ú'=>'ú','⇑'=>'⇑','↑'=>'↑', |
|
| 610 |
'Û'=>'Û','û'=>'û','Ù'=>'Ù','ù'=>'ù','¨'=>'¨', |
|
| 611 |
'ϒ'=>'ϒ','Υ'=>'Υ','υ'=>'υ','Ü'=>'Ü','ü'=>'ü', |
|
| 612 |
'℘'=>'℘','Ξ'=>'Ξ','ξ'=>'ξ','Ý'=>'Ý','ý'=>'ý', |
|
| 613 |
'¥'=>'¥','Ÿ'=>'Ÿ','ÿ'=>'ÿ','Ζ'=>'Ζ','ζ'=>'ζ','‍'=>'‍', |
|
| 614 |
'‌'=>'‌' |
|
| 615 |
); |
|
| 616 |
|
|
| 617 |
if ($in == 'HTML-ENTITIES') {
|
|
| 618 |
$string = strtr($string, $named_to_numbered_entities); |
|
| 619 |
$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
|
|
| 620 |
} |
|
| 621 |
elseif ($out == 'HTML-ENTITIES') {
|
|
| 622 |
$char = ""; |
|
| 623 |
$i=0; |
|
| 624 |
$len=strlen($string); |
|
| 625 |
if($len==0) return $string; |
|
| 626 |
do {
|
|
| 627 |
if(ord($string{$i}) <= 127) $ud = $string{$i++};
|
|
| 628 |
elseif(ord($string{$i}) <= 223) $ud = (ord($string{$i++})-192)*64 + (ord($string{$i++})-128);
|
|
| 629 |
elseif(ord($string{$i}) <= 239) $ud = (ord($string{$i++})-224)*4096 + (ord($string{$i++})-128)*64 + (ord($string{$i++})-128);
|
|
| 630 |
elseif(ord($string{$i}) <= 247) $ud = (ord($string{$i++})-240)*262144 + (ord($string{$i++})-128)*4096 + (ord($string{$i++})-128)*64 + (ord($string{$i++})-128);
|
|
| 631 |
elseif(ord($string{$i}) <= 251) $ud = ord($string{$i++}); // error!
|
|
| 632 |
if($ud > 127) {
|
|
| 633 |
$char .= "&#$ud;"; |
|
| 634 |
} else {
|
|
| 635 |
$char .= $ud; |
|
| 636 |
} |
|
| 637 |
} while($i < $len); |
|
| 638 |
$string = $char; |
|
| 639 |
$string = strtr($string, $numbered_to_named_entities); |
|
| 640 |
// do ' and " |
|
| 641 |
$string = strtr($string, array('\''=>''', '\"'=>'"'));
|
|
| 642 |
} |
|
| 643 |
return $string; |
|
| 366 |
// Convert a string from mixed html-entities/umlauts to pure $charset_out-umlauts |
|
| 367 |
// Will replace all numeric and named entities except > < ' " ' |
|
| 368 |
// In case of error the returned string is unchanged, and a message is emitted. |
|
| 369 |
function entities_to_umlauts($string, $charset_out=DEFAULT_CHARSET) {
|
|
| 370 |
//init utf8-functions -- workaround to prevent functions-utf8.php and charsets_table.php (~140kB) to be loaded more than once |
|
| 371 |
init_utf8funcs(); |
|
| 372 |
return entities_to_umlauts2($string, $charset_out); |
|
| 644 | 373 |
} |
| 645 | 374 |
|
| 646 |
// support-function for string_decode_encode_entities() |
|
| 647 |
function code_to_utf8($num) {
|
|
| 648 |
if ($num <= 0x7F) {
|
|
| 649 |
return chr($num); |
|
| 650 |
} elseif ($num <= 0x7FF) {
|
|
| 651 |
return chr(($num >> 6) + 192) . chr(($num & 63) + 128); |
|
| 652 |
} elseif ($num <= 0xFFFF) {
|
|
| 653 |
return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128); |
|
| 654 |
} elseif ($num <= 0x1FFFFF) {
|
|
| 655 |
return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128); |
|
| 656 |
} |
|
| 657 |
return " "; |
|
| 658 |
} |
|
| 659 |
|
|
| 660 |
// Function to convert a string from mixed html-entities/umlauts to pure utf-8-umlauts |
|
| 661 |
function string_to_utf8($string, $charset=DEFAULT_CHARSET) {
|
|
| 662 |
$charset = strtoupper($charset); |
|
| 663 |
if ($charset == '') { $charset = 'ISO-8859-1'; }
|
|
| 664 |
|
|
| 665 |
if (!is_UTF8($string)) {
|
|
| 666 |
$string=mb_convert_encoding_wrapper($string, 'UTF-8', $charset); |
|
| 667 |
} |
|
| 668 |
// check if we really get UTF-8. We don't get UTF-8 if charset is ISO-8859-6 or ISO-2022-JP/KR |
|
| 669 |
// and mb_string AND iconv aren't available. |
|
| 670 |
if (is_UTF8($string)) {
|
|
| 671 |
$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8'); |
|
| 672 |
$string=mb_convert_encoding_wrapper($string, 'UTF-8', 'HTML-ENTITIES'); |
|
| 673 |
} else {
|
|
| 674 |
// nothing we can do here :-( |
|
| 675 |
} |
|
| 676 |
return($string); |
|
| 677 |
} |
|
| 678 |
|
|
| 679 |
// function to check if a string is UTF-8 |
|
| 680 |
function is_UTF8 ($str) {
|
|
| 681 |
if (strlen($str) < 4000) {
|
|
| 682 |
// see http://bugs.php.net/bug.php?id=24460 and http://bugs.php.net/bug.php?id=27070 and http://ilia.ws/archives/5-Top-10-ways-to-crash-PHP.html for this. |
|
| 683 |
// 4000 works for me ... |
|
| 684 |
return preg_match('/^(?:[\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$/s', $str);
|
|
| 685 |
} else {
|
|
| 686 |
$isUTF8 = true; |
|
| 687 |
while($str{0}) {
|
|
| 688 |
if (preg_match("/^[\x09\x0A\x0D\x20-\x7E]/", $str)) { $str = substr($str, 1); continue; }
|
|
| 689 |
if (preg_match("/^[\xC2-\xDF][\x80-\xBF]/", $str)) { $str = substr($str, 2); continue; }
|
|
| 690 |
if (preg_match("/^\xE0[\xA0-\xBF][\x80-\xBF]/", $str)) { $str = substr($str, 3); continue; }
|
|
| 691 |
if (preg_match("/^[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}/", $str)) { $str = substr($str, 3); continue; }
|
|
| 692 |
if (preg_match("/^\xED[\x80-\x9F][\x80-\xBF]/", $str)) { $str = substr($str, 3); continue; }
|
|
| 693 |
if (preg_match("/^\xF0[\x90-\xBF][\x80-\xBF]{2}/", $str)) { $str = substr($str, 4); continue; }
|
|
| 694 |
if (preg_match("/^[\xF1-\xF3][\x80-\xBF]{3}/", $str)) { $str = substr($str, 4); continue; }
|
|
| 695 |
if (preg_match("/^\xF4[\x80-\x8F][\x80-\xBF]{2}/", $str)) { $str = substr($str, 4); continue; }
|
|
| 696 |
if (preg_match("/^$/", $str)) { break; }
|
|
| 697 |
$isUTF8 = false; |
|
| 698 |
break; |
|
| 699 |
} |
|
| 700 |
return ($isUTF8); |
|
| 701 |
} |
|
| 702 |
} |
|
| 703 |
|
|
| 704 |
// Function to convert a string from mixed html-entities/umlauts to pure $charset_out-umlauts |
|
| 705 |
function entities_to_umlauts($string, $charset_out=DEFAULT_CHARSET) {
|
|
| 706 |
$charset_out = strtoupper($charset_out); |
|
| 707 |
if ($charset_out == '') { $charset_out = 'ISO-8859-1'; }
|
|
| 708 |
$charset_in = strtoupper(DEFAULT_CHARSET); |
|
| 709 |
require_once(WB_PATH.'/framework/charsets_table.php'); |
|
| 710 |
global $iso_8859_2_to_utf8, $iso_8859_3_to_utf8, $iso_8859_4_to_utf8, $iso_8859_5_to_utf8, $iso_8859_6_to_utf8, $iso_8859_7_to_utf8, $iso_8859_8_to_utf8, $iso_8859_9_to_utf8, $iso_8859_10_to_utf8, $iso_8859_11_to_utf8; |
|
| 711 |
global $utf8_to_iso_8859_2, $utf8_to_iso_8859_3, $utf8_to_iso_8859_4, $utf8_to_iso_8859_5, $utf8_to_iso_8859_6, $utf8_to_iso_8859_7, $utf8_to_iso_8859_8, $utf8_to_iso_8859_9, $utf8_to_iso_8859_10, $utf8_to_iso_8859_11; |
|
| 712 |
|
|
| 713 |
// string to utf-8, entities_to_utf8 |
|
| 714 |
if (substr($charset_in,0,8) == 'ISO-8859' || $charset_in == 'UTF-8') {
|
|
| 715 |
if ($charset_in == 'ISO-8859-1') {
|
|
| 716 |
$string=utf8_encode($string); |
|
| 717 |
} elseif ($charset_in == 'ISO-8859-2') {
|
|
| 718 |
$string = strtr($string, $iso_8859_2_to_utf8); |
|
| 719 |
} elseif ($charset_in == 'ISO-8859-3') {
|
|
| 720 |
$string = strtr($string, $iso_8859_3_to_utf8); |
|
| 721 |
} elseif ($charset_in == 'ISO-8859-4') {
|
|
| 722 |
$string = strtr($string, $iso_8859_4_to_utf8); |
|
| 723 |
} elseif ($charset_in == 'ISO-8859-5') {
|
|
| 724 |
$string = strtr($string, $iso_8859_5_to_utf8); |
|
| 725 |
} elseif ($charset_in == 'ISO-8859-6') {
|
|
| 726 |
$string = strtr($string, $iso_8859_6_to_utf8); |
|
| 727 |
} elseif ($charset_in == 'ISO-8859-7') {
|
|
| 728 |
$string = strtr($string, $iso_8859_7_to_utf8); |
|
| 729 |
} elseif ($charset_in == 'ISO-8859-8') {
|
|
| 730 |
$string = strtr($string, $iso_8859_8_to_utf8); |
|
| 731 |
} elseif ($charset_in == 'ISO-8859-9') {
|
|
| 732 |
$string = strtr($string, $iso_8859_9_to_utf8); |
|
| 733 |
} elseif ($charset_in == 'ISO-8859-10') {
|
|
| 734 |
$string = strtr($string, $iso_8859_10_to_utf8); |
|
| 735 |
} elseif ($charset_in == 'ISO-8859-11') {
|
|
| 736 |
$string = strtr($string, $iso_8859_11_to_utf8); |
|
| 737 |
} |
|
| 738 |
// decode html-entities |
|
| 739 |
if(preg_match("/&[#a-zA-Z0-9]+;/", $string)) {
|
|
| 740 |
$string=string_decode_encode_entities($string, 'UTF-8', 'HTML-ENTITIES'); |
|
| 741 |
//$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8'); // alternative to string_decode_encode_entities() |
|
| 742 |
//$string=mb_convert_encoding_wrapper($string, 'UTF-8', 'HTML-ENTITIES'); |
|
| 743 |
} |
|
| 744 |
} |
|
| 745 |
else {
|
|
| 746 |
$string = string_to_utf8($string); // will decode html-entities, too. |
|
| 747 |
} |
|
| 748 |
// string to $charset_out |
|
| 749 |
if($charset_out == 'ISO-8859-1') {
|
|
| 750 |
$string=utf8_decode($string); |
|
| 751 |
} elseif($charset_out == 'ISO-8859-2') {
|
|
| 752 |
$string = strtr($string, $utf8_to_iso_8859_2); |
|
| 753 |
} elseif($charset_out == 'ISO-8859-3') {
|
|
| 754 |
$string = strtr($string, $utf8_to_iso_8859_3); |
|
| 755 |
} elseif($charset_out == 'ISO-8859-4') {
|
|
| 756 |
$string = strtr($string, $utf8_to_iso_8859_4); |
|
| 757 |
} elseif($charset_out == 'ISO-8859-5') {
|
|
| 758 |
$string = strtr($string, $utf8_to_iso_8859_5); |
|
| 759 |
} elseif($charset_out == 'ISO-8859-6') {
|
|
| 760 |
$string = strtr($string, $utf8_to_iso_8859_6); |
|
| 761 |
} elseif($charset_out == 'ISO-8859-7') {
|
|
| 762 |
$string = strtr($string, $utf8_to_iso_8859_7); |
|
| 763 |
} elseif($charset_out == 'ISO-8859-8') {
|
|
| 764 |
$string = strtr($string, $utf8_to_iso_8859_8); |
|
| 765 |
} elseif($charset_out == 'ISO-8859-9') {
|
|
| 766 |
$string = strtr($string, $utf8_to_iso_8859_9); |
|
| 767 |
} elseif($charset_out == 'ISO-8859-10') {
|
|
| 768 |
$string = strtr($string, $utf8_to_iso_8859_10); |
|
| 769 |
} elseif($charset_out == 'ISO-8859-11') {
|
|
| 770 |
$string = strtr($string, $utf8_to_iso_8859_11); |
|
| 771 |
} elseif($charset_out != 'UTF-8') {
|
|
| 772 |
if(is_UTF8($string)) {
|
|
| 773 |
$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8'); |
|
| 774 |
} |
|
| 775 |
} |
|
| 776 |
return $string; |
|
| 777 |
} |
|
| 778 |
|
|
| 779 |
// Function to convert a string from mixed html-entitites/$charset_in-umlauts to pure html-entities |
|
| 375 |
// Will convert a string in $charset_in encoding to a pure ASCII string with HTML-entities. |
|
| 376 |
// In case of error the returned string is unchanged, and a message is emitted. |
|
| 780 | 377 |
function umlauts_to_entities($string, $charset_in=DEFAULT_CHARSET) {
|
| 781 |
$charset_in = strtoupper($charset_in); |
|
| 782 |
if ($charset_in == "") { $charset_in = 'ISO-8859-1'; }
|
|
| 783 |
require_once(WB_PATH.'/framework/charsets_table.php'); |
|
| 784 |
global $iso_8859_2_to_utf8, $iso_8859_3_to_utf8, $iso_8859_4_to_utf8, $iso_8859_5_to_utf8, $iso_8859_6_to_utf8, $iso_8859_7_to_utf8, $iso_8859_8_to_utf8, $iso_8859_9_to_utf8, $iso_8859_10_to_utf8, $iso_8859_11_to_utf8; |
|
| 785 |
|
|
| 786 |
// string to utf-8, umlauts_to_entities |
|
| 787 |
if ($charset_in == 'UTF-8' || substr($charset_in,0,8) == 'ISO-8859') {
|
|
| 788 |
if ($charset_in == 'ISO-8859-1') {
|
|
| 789 |
$string=utf8_encode($string); |
|
| 790 |
} elseif ($charset_in == 'ISO-8859-2') {
|
|
| 791 |
$string = strtr($string, $iso_8859_2_to_utf8); |
|
| 792 |
} elseif ($charset_in == 'ISO-8859-3') {
|
|
| 793 |
$string = strtr($string, $iso_8859_3_to_utf8); |
|
| 794 |
} elseif ($charset_in == 'ISO-8859-4') {
|
|
| 795 |
$string = strtr($string, $iso_8859_4_to_utf8); |
|
| 796 |
} elseif ($charset_in == 'ISO-8859-5') {
|
|
| 797 |
$string = strtr($string, $iso_8859_5_to_utf8); |
|
| 798 |
} elseif ($charset_in == 'ISO-8859-6') {
|
|
| 799 |
$string = strtr($string, $iso_8859_6_to_utf8); |
|
| 800 |
} elseif ($charset_in == 'ISO-8859-7') {
|
|
| 801 |
$string = strtr($string, $iso_8859_7_to_utf8); |
|
| 802 |
} elseif ($charset_in == 'ISO-8859-8') {
|
|
| 803 |
$string = strtr($string, $iso_8859_8_to_utf8); |
|
| 804 |
} elseif ($charset_in == 'ISO-8859-9') {
|
|
| 805 |
$string = strtr($string, $iso_8859_9_to_utf8); |
|
| 806 |
} elseif ($charset_in == 'ISO-8859-10') {
|
|
| 807 |
$string = strtr($string, $iso_8859_10_to_utf8); |
|
| 808 |
} elseif ($charset_in == 'ISO-8859-11') {
|
|
| 809 |
$string = strtr($string, $iso_8859_11_to_utf8); |
|
| 810 |
} |
|
| 811 |
// encode html-entities |
|
| 812 |
$string=string_decode_encode_entities($string, 'HTML-ENTITIES', 'UTF-8'); |
|
| 813 |
//$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8'); |
|
| 814 |
} |
|
| 815 |
else {
|
|
| 816 |
$string = string_to_utf8($string, $charset_in); |
|
| 817 |
// encode html-entities |
|
| 818 |
if (is_UTF8($string)) {
|
|
| 819 |
$string=string_decode_encode_entities($string, 'HTML-ENTITIES', 'UTF-8'); |
|
| 820 |
//$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8'); |
|
| 821 |
} |
|
| 822 |
} |
|
| 823 |
return $string; |
|
| 378 |
//init utf8-functions -- workaround to prevent functions-utf8.php and charsets_table.php (~140kB) to be loaded more than once |
|
| 379 |
init_utf8funcs(); |
|
| 380 |
return umlauts_to_entities2($string, $charset_in); |
|
| 824 | 381 |
} |
| 825 | 382 |
|
| 826 |
function umlauts_to_defcharset($string, $charset) {
|
|
| 827 |
$charset_out = strtoupper(DEFAULT_CHARSET); |
|
| 828 |
if ($charset_out == "") { $charset_out = 'ISO-8859-1'; }
|
|
| 829 |
require_once(WB_PATH.'/framework/charsets_table.php'); |
|
| 830 |
global $utf8_to_iso_8859_2, $utf8_to_iso_8859_3, $utf8_to_iso_8859_4, $utf8_to_iso_8859_5, $utf8_to_iso_8859_6, $utf8_to_iso_8859_7, $utf8_to_iso_8859_8, $utf8_to_iso_8859_9, $utf8_to_iso_8859_10, $utf8_to_iso_8859_11; |
|
| 831 |
|
|
| 832 |
if($charset_out == $charset) {
|
|
| 833 |
return $string; |
|
| 834 |
} |
|
| 835 |
|
|
| 836 |
if($charset == 'UTF-8') {
|
|
| 837 |
if($charset_out == 'ISO-8859-1') {
|
|
| 838 |
$string = utf8_decode($string); |
|
| 839 |
} elseif ($charset_out == 'ISO-8859-2') {
|
|
| 840 |
$string = strtr($string, $utf8_to_iso_8859_2); |
|
| 841 |
} elseif ($charset_out == 'ISO-8859-3') {
|
|
| 842 |
$string = strtr($string, $utf8_to_iso_8859_3); |
|
| 843 |
} elseif ($charset_out == 'ISO-8859-4') {
|
|
| 844 |
$string = strtr($string, $utf8_to_iso_8859_4); |
|
| 845 |
} elseif ($charset_out == 'ISO-8859-5') {
|
|
| 846 |
$string = strtr($string, $utf8_to_iso_8859_5); |
|
| 847 |
} elseif ($charset_out == 'ISO-8859-6') {
|
|
| 848 |
$string = strtr($string, $utf8_to_iso_8859_6); |
|
| 849 |
} elseif ($charset_out == 'ISO-8859-7') {
|
|
| 850 |
$string = strtr($string, $utf8_to_iso_8859_7); |
|
| 851 |
} elseif ($charset_out == 'ISO-8859-8') {
|
|
| 852 |
$string = strtr($string, $utf8_to_iso_8859_8); |
|
| 853 |
} elseif ($charset_out == 'ISO-8859-9') {
|
|
| 854 |
$string = strtr($string, $utf8_to_iso_8859_9); |
|
| 855 |
} elseif ($charset_out == 'ISO-8859-10') {
|
|
| 856 |
$string = strtr($string, $utf8_to_iso_8859_10); |
|
| 857 |
} elseif ($charset_out == 'ISO-8859-11') {
|
|
| 858 |
$string = strtr($string, $utf8_to_iso_8859_11); |
|
| 859 |
} |
|
| 860 |
else {
|
|
| 861 |
$string=mb_convert_encoding_wrapper($string, $charset_out, $charset); |
|
| 862 |
} |
|
| 863 |
} |
|
| 864 |
else {
|
|
| 865 |
$string=mb_convert_encoding_wrapper($string, $charset_out, $charset); |
|
| 866 |
} |
|
| 867 |
|
|
| 868 |
return $string; |
|
| 869 |
} |
|
| 870 |
|
|
| 871 |
// translate any latin/greek/cyrillic html-entities to their plain 7bit equivalents |
|
| 872 |
// and numbered-entities into hex |
|
| 873 |
function entities_to_7bit($string) {
|
|
| 874 |
require(WB_PATH.'/framework/convert.php'); |
|
| 875 |
$string = strtr($string, $conversion_array); |
|
| 876 |
$string = preg_replace('/&#([0-9]+);/e', "dechex('$1')", $string);
|
|
| 877 |
return($string); |
|
| 878 |
} |
|
| 879 |
|
|
| 880 | 383 |
// Function to convert a page title to a page filename |
| 881 | 384 |
function page_filename($string) {
|
| 882 |
$string = entities_to_7bit(umlauts_to_entities($string)); |
|
| 385 |
//init utf8-functions -- workaround to prevent functions-utf8.php and charsets_table.php (~140kB) to be loaded more than once |
|
| 386 |
init_utf8funcs(); |
|
| 387 |
$string = entities_to_7bit($string); |
|
| 883 | 388 |
// Now replace spaces with page spcacer |
| 884 | 389 |
$string = trim($string); |
| 885 | 390 |
$string = preg_replace('/(\s)+/', PAGE_SPACER, $string);
|
| ... | ... | |
| 903 | 408 |
|
| 904 | 409 |
// Function to convert a desired media filename to a clean filename |
| 905 | 410 |
function media_filename($string) {
|
| 906 |
$string = entities_to_7bit(umlauts_to_entities($string)); |
|
| 411 |
//init utf8-functions -- workaround to prevent functions-utf8.php and charsets_table.php (~140kB) to be loaded more than once |
|
| 412 |
init_utf8funcs(); |
|
| 413 |
$string = entities_to_7bit($string); |
|
| 907 | 414 |
// Now remove all bad characters |
| 908 | 415 |
$bad = array( |
| 909 | 416 |
'\'', // ' |
Also available in: Unified diff
added new module-based search-function and publish-by-date code