Project

General

Profile

« Previous | Next » 

Revision 552

Added by thorn almost 17 years ago

added new module-based search-function and publish-by-date code

View differences:

functions.php
341 341
}
342 342

  
343 343
// Function as replacement for php's htmlspecialchars()
344
// Will not mangle HTML-entities
344 345
function my_htmlspecialchars($string) {
345
	$string = preg_replace("/&(?=[#a-z0-9]+;)/i", "_x_", $string);
346
	$string = strtr($string, array("<"=>"&lt;", ">"=>"&gt;", "&"=>"&amp;", "\""=>"&quot;", "\'"=>"&#39;"));
347
	$string = preg_replace("/_x_(?=[#a-z0-9]+;)/i", "&", $string);
346
	$string = preg_replace('/&(?=[#a-z0-9]+;)/i', '__amp;_', $string);
347
	$string = strtr($string, array('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;', '\''=>'&#39;'));
348
	$string = preg_replace('/__amp;_(?=[#a-z0-9]+;)/i', '&', $string);
348 349
	return($string);
349 350
}
350 351

  
351
// Function to convert a string from $from- to $to-encoding, using mysql
352
function my_mysql_iconv($string, $from, $to) {
353
	// keep current character set values
354
	global $database;
355
	$query = $database->query("SELECT @@character_set_client");
356
	if($query->numRows() > 0) {
357
		$res = $query->fetchRow();
358
		$character_set_database = $res['@@character_set_client'];
359
	}	else { echo mysql_error()."\n<br />"; }
360
	$query = $database->query("SELECT @@character_set_results");
361
	if($query->numRows() > 0) {
362
		$res = $query->fetchRow();
363
		$character_set_results = $res['@@character_set_results'];
364
	}	else { echo mysql_error()."\n<br />"; }
365
	$query = $database->query("SELECT @@collation_connection");
366
	if($query->numRows() > 0) {
367
		$res = $query->fetchRow();
368
		$collation_results = $res['@@collation_connection'];
369
	}	else { echo mysql_error()."\n<br />"; }
370
	// set new character set values
371
	$query = $database->query("SET character_set_client=$from");
372
	$query = $database->query("SET character_set_results=$to");
373
	$query = $database->query("SET collation_connection=utf8_unicode_ci");
374
	$string_escaped = mysql_real_escape_string($string);
375
	// convert the string
376
	$query = $database->query("SELECT '$string_escaped'");
377
	if($query->numRows() > 0) {
378
		$res = $query->fetchRow();
379
		$converted_string = $res[0];
380
	}	else { echo mysql_error()."\n<br />"; }
381
	// restore previous character set values
382
	$query = $database->query("SET character_set_client=$character_set_database");
383
	$query = $database->query("SET character_set_results=$character_set_results");
384
	$query = $database->query("SET collation_connection=$collation_results");
385
	return $converted_string;
386
}
387

  
388
// Function as wrapper for mb_convert_encoding
389
// converts $charset_in to $charset_out or 
390
// UTF-8 to HTML-ENTITIES or HTML-ENTITIES to UTF-8
391
function mb_convert_encoding_wrapper($string, $charset_out, $charset_in) {
392
	if ($charset_out == $charset_in) {
393
		return $string;
352
// init utf8-functions -- workaround to prevent functions-utf8.php and charsets_table.php (~140kB) to be loaded more than once
353
// functions and arrays from functions-utf8.php and charsets_table.php will be in global name-space
354
function init_utf8funcs() {
355
	static $utf8_ok=0;
356
	if($utf8_ok == 0) {
357
		++$utf8_ok;
358
		// debug XXX to be removed
359
		if($utf8_ok > 1)
360
			trigger_error("init_utf8funcs: utf8_ok > 1", E_USER_ERROR);
361
		// XXX remove end
362
		require_once(WB_PATH.'/framework/functions-utf8.php');
394 363
	}
395
	$use_iconv = true;
396
	$use_mbstring = true;
397
	/*
398
	if(version_compare(PHP_VERSION, "5.1.0", "<")) {
399
		$use_mbstring = false; // don't rely on mb_convert_encoding if php<5.1.0
400
		$use_iconv = false; // don't rely on iconv neither
401
	}
402
	*/
403
	
404
	// try mb_convert_encoding(). This can handle to or from HTML-ENTITIES, too
405
	if ($use_mbstring && function_exists('mb_convert_encoding')) {
406
		// there's no GB2312 or ISO-8859-11 encoding in php's mb_* functions
407
		if ($charset_in=='ISO-8859-11' || $charset_in=='GB2312') {
408
			if ($use_iconv && function_exists('iconv')) {
409
				$string = iconv($charset_in, 'UTF-8', $string);
410
			}
411
			else {
412
				if ($charset_in == 'GB2312') {
413
					$string=my_mysql_iconv($string, 'gb2312', 'utf8');
414
				} else {
415
					$string=my_mysql_iconv($string, 'tis620', 'utf8');
416
				}
417
			}
418
			$charset_in='UTF-8';
419
			if ($charset_out == 'UTF-8') {
420
				return $string;
421
			}
422
		}
423
		if ($charset_out=='ISO-8859-11' || $charset_out=='GB2312') {
424
			$string=mb_convert_encoding($string, 'UTF-8', $charset_in);
425
			if ($use_iconv && function_exists('iconv')) {
426
				$string = iconv('UTF-8', $charset_out, $string);
427
			}
428
			else {
429
				if ($charset_out == 'GB2312') {
430
					$string=my_mysql_iconv($string, 'utf8', 'gb2312');
431
				} else {
432
					$string=my_mysql_iconv($string, 'utf8', 'tis620');
433
				}
434
			}
435
		} else {
436
			$string = strtr($string, array("&lt;"=>"&_lt;", "&gt;"=>"&_gt;", "&amp;"=>"&_amp;", "&quot;"=>"&_quot;", "&#39;"=>"&_#39;"));
437
			$string=mb_convert_encoding($string, $charset_out, $charset_in);
438
			$string = strtr($string, array("&_lt;"=>"&lt;", "&_gt;"=>"&gt;", "&_amp;"=>"&amp;", "&_quot;"=>"&quot;", "&_#39;"=>"&#39;"));
439
		}
440
		return $string;
441
	}
442

  
443
	// try iconv(). This can't handle to or from HTML-ENTITIES.
444
	if ($use_iconv && function_exists('iconv') && $charset_out!='HTML-ENTITIES' && $charset_in!='HTML-ENTITIES' ) {
445
		$string = iconv($charset_in, $charset_out, $string);
446
		return $string;
447
	}
448

  
449
	// do the UTF-8->HTML-ENTITIES or HTML-ENTITIES->UTF-8 translation if mb_convert_encoding isn't available
450
	if (($charset_in=='HTML-ENTITIES' && $charset_out=='UTF-8') || ($charset_in=='UTF-8' && $charset_out=='HTML-ENTITIES')) {
451
		$string = string_decode_encode_entities($string, $charset_out, $charset_in);
452
		return $string;
453
	}
454

  
455
	// mb_convert_encoding() and iconv() aren't available, so use my_mysql_iconv()
456
	if ($charset_in == 'ISO-8859-1') { $mysqlcharset_from = 'latin1'; }
457
	elseif ($charset_in == 'ISO-8859-2') { $mysqlcharset_from = 'latin2'; }
458
	elseif ($charset_in == 'ISO-8859-3') { $mysqlcharset_from = 'latin1'; }
459
	elseif ($charset_in == 'ISO-8859-4') { $mysqlcharset_from = 'latin7'; }
460
	elseif ($charset_in == 'ISO-8859-5') { $string = convert_cyr_string ($string, "iso8859-5", "windows-1251" ); $mysqlcharset_from = 'cp1251'; }
461
	elseif ($charset_in == 'ISO-8859-6') { $mysqlcharset_from = ''; } //?
462
	elseif ($charset_in == 'ISO-8859-7') { $mysqlcharset_from = 'greek'; }
463
	elseif ($charset_in == 'ISO-8859-8') { $mysqlcharset_from = 'hebrew'; }
464
	elseif ($charset_in == 'ISO-8859-9') { $mysqlcharset_from = 'latin5'; }
465
	elseif ($charset_in == 'ISO-8859-10') { $mysqlcharset_from = 'latin1'; }
466
	elseif ($charset_in == 'BIG5') { $mysqlcharset_from = 'big5'; }
467
	elseif ($charset_in == 'ISO-2022-JP') { $mysqlcharset_from = ''; } //?
468
	elseif ($charset_in == 'ISO-2022-KR') { $mysqlcharset_from = ''; } //?
469
	elseif ($charset_in == 'GB2312') { $mysqlcharset_from = 'gb2312'; }
470
	elseif ($charset_in == 'ISO-8859-11') { $mysqlcharset_from = 'tis620'; }
471
	elseif ($charset_in == 'UTF-8') { $mysqlcharset_from = 'utf8'; }
472
	else { $mysqlcharset_from = 'latin1'; }
473

  
474
	if ($charset_out == 'ISO-8859-1') { $mysqlcharset_to = 'latin1'; }
475
	elseif ($charset_out == 'ISO-8859-2') { $mysqlcharset_to = 'latin2'; }
476
	elseif ($charset_out == 'ISO-8859-3') { $mysqlcharset_to = 'latin1'; }
477
	elseif ($charset_out == 'ISO-8859-4') { $mysqlcharset_to = 'latin7'; }
478
	elseif ($charset_out == 'ISO-8859-5') { $mysqlcharset_to = 'cp1251'; } // use convert_cyr_string afterwards
479
	elseif ($charset_out == 'ISO-8859-6') { $mysqlcharset_to = ''; } //?
480
	elseif ($charset_out == 'ISO-8859-7') { $mysqlcharset_to = 'greek'; }
481
	elseif ($charset_out == 'ISO-8859-8') { $mysqlcharset_to = 'hebrew'; }
482
	elseif ($charset_out == 'ISO-8859-9') { $mysqlcharset_to = 'latin5'; }
483
	elseif ($charset_out == 'ISO-8859-10') { $mysqlcharset_to = 'latin1'; }
484
	elseif ($charset_out == 'BIG5') { $mysqlcharset_to = 'big5'; }
485
	elseif ($charset_out == 'ISO-2022-JP') { $mysqlcharset_to = ''; } //?
486
	elseif ($charset_out == 'ISO-2022-KR') { $mysqlcharset_to = ''; } //?
487
	elseif ($charset_out == 'GB2312') { $mysqlcharset_to = 'gb2312'; }
488
	elseif ($charset_out == 'ISO-8859-11') { $mysqlcharset_to = 'tis620'; }
489
	elseif ($charset_out == 'UTF-8') { $mysqlcharset_to = 'utf8'; }
490
	else { $mysqlcharset_to = 'latin1'; }
491

  
492
	if ($mysqlcharset_from!="" && $mysqlcharset_to!="" && $mysqlcharset_from!=$mysqlcharset_to) {
493
		$string=my_mysql_iconv($string, $mysqlcharset_from, $mysqlcharset_to);
494
		if ($mysqlcharset_to == 'cp1251') { 
495
			$string = convert_cyr_string ($string, "windows-1251", "iso-8859-5" );
496
		}
497
		return($string);
498
	}
499

  
500
	// $string is unchanged. This will happen if we have to deal with ISO-8859-6 or ISO-2022-JP or -KR
501
	// and mbstring _and_ iconv aren't available.
502
	return $string;
503 364
}
504 365

  
505
// Decodes or encodes html-entities. Works for utf-8 only!
506
function string_decode_encode_entities($string, $out='HTML-ENTITIES', $in='UTF-8') {
507
	if(!(($in=='UTF-8' || $in=='HTML-ENTITIES') && ($out=='UTF-8' || $out=='HTML-ENTITIES'))) {
508
		return $string;
509
	}
510
	$named_to_numbered_entities=array(
511
		'&Aacute;'=>'&#193;','&aacute;'=>'&#225;',
512
		'&Acirc;'=>'&#194;','&acirc;'=>'&#226;','&acute;'=>'&#180;','&AElig;'=>'&#198;','&aelig;'=>'&#230;',
513
		'&Agrave;'=>'&#192;','&agrave;'=>'&#224;','&alefsym;'=>'&#8501;','&Alpha;'=>'&#913;','&alpha;'=>'&#945;',
514
		'&and;'=>'&#8743;','&ang;'=>'&#8736;','&apos;'=>'&#39;','&Aring;'=>'&#197;','&aring;'=>'&#229;',
515
		'&asymp;'=>'&#8776;','&Atilde;'=>'&#195;','&atilde;'=>'&#227;','&Auml;'=>'&#196;','&auml;'=>'&#228;',
516
		'&bdquo;'=>'&#8222;','&Beta;'=>'&#914;','&beta;'=>'&#946;','&brvbar;'=>'&#166;','&bull;'=>'&#8226;',
517
		'&cap;'=>'&#8745;','&Ccedil;'=>'&#199;','&ccedil;'=>'&#231;','&cedil;'=>'&#184;','&cent;'=>'&#162;',
518
		'&Chi;'=>'&#935;','&chi;'=>'&#967;','&circ;'=>'&#710;','&clubs;'=>'&#9827;','&cong;'=>'&#8773;',
519
		'&copy;'=>'&#169;','&crarr;'=>'&#8629;','&cup;'=>'&#8746;','&curren;'=>'&#164;','&Dagger;'=>'&#8225;',
520
		'&dagger;'=>'&#8224;','&dArr;'=>'&#8659;','&darr;'=>'&#8595;','&deg;'=>'&#176;','&Delta;'=>'&#916;',
521
		'&delta;'=>'&#948;','&diams;'=>'&v#9830;','&divide;'=>'&#247;','&Eacute;'=>'&#201;','&eacute;'=>'&#233;',
522
		'&Ecirc;'=>'&#202;','&ecirc;'=>'&#234;','&Egrave;'=>'&#200;','&egrave;'=>'&#232;','&empty;'=>'&#8709;',
523
		'&emsp;'=>'&#8195;','&ensp;'=>'&#8194;','&Epsilon;'=>'&#917;','&epsilon;'=>'&#949;','&equiv;'=>'&#8801;',
524
		'&Eta;'=>'&#919;','&eta;'=>'&#951;','&ETH;'=>'&#208;','&eth;'=>'&#240;','&Euml;'=>'&#203;','&euml;'=>'&#235;',
525
		'&euro;'=>'&#8364;','&exist;'=>'&#8707;','&fnof;'=>'&#402;','&forall;'=>'&#8704;','&frac12;'=>'&#189;',
526
		'&frac14;'=>'&#188;','&frac34;'=>'&#190;','&frasl;'=>'&#8260;','&Gamma;'=>'&#915;','&gamma;'=>'&#947;',
527
		'&ge;'=>'&#8805;','&hArr;'=>'&#8660;','&harr;'=>'&#8596;','&hearts;'=>'&#9829;',
528
		'&hellip;'=>'&#8230;','&Iacute;'=>'&#205;','&iacute;'=>'&#237;','&Icirc;'=>'&#206;','&icirc;'=>'&#238;',
529
		'&iexcl;'=>'&#161;','&Igrave;'=>'&#204;','&igrave;'=>'&#236;','&image;'=>'&#8465;','&infin;'=>'&#8734;',
530
		'&int;'=>'&#8747;','&Iota;'=>'&#921;','&iota;'=>'&#953;','&iquest;'=>'&#191;','&isin;'=>'&#8712;',
531
		'&Iuml;'=>'&#207;','&iuml;'=>'&#239;','&Kappa;'=>'&#922;','&kappa;'=>'&#954;','&Lambda;'=>'&#923;',
532
		'&lambda;'=>'&#955;','&lang;'=>'&#9001;','&laquo;'=>'&#171;','&lArr;'=>'&#8656;','&larr;'=>'&#8592;',
533
		'&lceil;'=>'&#8968;','&ldquo;'=>'&#8220;','&le;'=>'&#8804;','&lfloor;'=>'&#8970;','&lowast;'=>'&#8727;',
534
		'&loz;'=>'&#9674;','&lrm;'=>'&#8206;','&lsaquo;'=>'&#8249;','&lsquo;'=>'&#8216;',
535
		'&macr;'=>'&#175;','&mdash;'=>'&#8212;','&micro;'=>'&#181;','&middot;'=>'&#183;','&minus;'=>'&#8722;',
536
		'&Mu;'=>'&#924;','&mu;'=>'&#956;','&nabla;'=>'&#8711;','&nbsp;'=>'&#160;','&ndash;'=>'&#8211;',
537
		'&ne;'=>'&#8800;','&ni;'=>'&#8715;','&not;'=>'&#172;','&notin;'=>'&#8713;','&nsub;'=>'&#8836;',
538
		'&Ntilde;'=>'&#209;','&ntilde;'=>'&#241;','&Nu;'=>'&#925;','&nu;'=>'&#957;','&Oacute;'=>'&#211;',
539
		'&oacute;'=>'&#243;','&Ocirc;'=>'&#212;','&ocirc;'=>'&#244;','&OElig;'=>'&#338;','&oelig;'=>'&#339;',
540
		'&Ograve;'=>'&#210;','&ograve;'=>'&#242;','&oline;'=>'&#8254;','&Omega;'=>'&#937;','&omega;'=>'&#969;',
541
		'&Omicron;'=>'&#927;','&omicron;'=>'&#959;','&oplus;'=>'&#8853;','&or;'=>'&#8744;','&ordf;'=>'&#170;',
542
		'&ordm;'=>'&#186;','&Oslash;'=>'&#216;','&oslash;'=>'&#248;','&Otilde;'=>'&#213;','&otilde;'=>'&#245;',
543
		'&otimes;'=>'&#8855;','&Ouml;'=>'&#214;','&ouml;'=>'&#246;','&para;'=>'&#182;','&part;'=>'&#8706;',
544
		'&permil;'=>'&#8240;','&perp;'=>'&#8869;','&Phi;'=>'&#934;','&phi;'=>'&#966;','&Pi;'=>'&#928;',
545
		'&pi;'=>'&#960;','&piv;'=>'&#982;','&plusmn;'=>'&#177;','&pound;'=>'&#163;','&Prime;'=>'&#8243;',
546
		'&prime;'=>'&#8242;','&prod;'=>'&#8719;','&prop;'=>'&#8733;','&Psi;'=>'&#936;','&psi;'=>'&#968;',
547
		'&quot;'=>'&#34;','&radic;'=>'&#8730;','&rang;'=>'&#9002;','&raquo;'=>'&#187;','&rArr;'=>'&#8658;',
548
		'&rarr;'=>'&#8594;','&rceil;'=>'&#8969;','&rdquo;'=>'&#8221;','&real;'=>'&#8476;','&reg;'=>'&#174;',
549
		'&rfloor;'=>'&#8971;','&Rho;'=>'&#929;','&rho;'=>'&#961;','&rlm;'=>'&#8207;','&rsaquo;'=>'&#8250;',
550
		'&rsquo;'=>'&#8217;','&sbquo;'=>'&#8218;','&Scaron;'=>'&#352;','&scaron;'=>'&#353;','&sdot;'=>'&#8901;',
551
		'&sect;'=>'&#167;','&shy;'=>'&#173;','&Sigma;'=>'&#931;','&sigma;'=>'&#963;','&sigmaf;'=>'&#962;',
552
		'&sim;'=>'&#8764;','&spades;'=>'&#9824;','&sub;'=>'&#8834;','&sube;'=>'&#8838;','&sum;'=>'&#8721;',
553
		'&sup;'=>'&#8835;','&sup1;'=>'&#185;','&sup2;'=>'&#178;','&sup3;'=>'&#179;','&supe;'=>'&#8839;',
554
		'&szlig;'=>'&#223;','&Tau;'=>'&#932;','&tau;'=>'&#964;','&there4;'=>'&#8756;','&Theta;'=>'&#920;',
555
		'&theta;'=>'&#952;','&thetasym;'=>'&#977;','&thinsp;'=>'&#8201;','&THORN;'=>'&#222;','&thorn;'=>'&#254;',
556
		'&tilde;'=>'&#732;','&times;'=>'&#215;','&trade;'=>'&#8482;','&Uacute;'=>'&#218;','&uacute;'=>'&#250;',
557
		'&uArr;'=>'&#8657;','&uarr;'=>'&#8593;','&Ucirc;'=>'&#219;','&ucirc;'=>'&#251;','&Ugrave;'=>'&#217;',
558
		'&ugrave;'=>'&#249;','&uml;'=>'&#168;','&upsih;'=>'&#978;','&Upsilon;'=>'&#933;','&upsilon;'=>'&#965;',
559
		'&Uuml;'=>'&#220;','&uuml;'=>'&#252;','&weierp;'=>'&#8472;','&Xi;'=>'&#926;','&xi;'=>'&#958;',
560
		'&Yacute;'=>'&#221;','&yacute;'=>'&#253;','&yen;'=>'&#165;','&Yuml;'=>'&#376;','&yuml;'=>'&#255;',
561
		'&Zeta;'=>'&#918;','&zeta;'=>'&#950;','&zwj;'=>'&#8205;','&zwnj;'=>'&#8204;'
562
	);
563
	$numbered_to_named_entities=array(
564
		'&#193;'=>'&Aacute;','&#225;'=>'&aacute;','&#194;'=>'&Acirc;','&#226;'=>'&acirc;','&#180;'=>'&acute;',
565
		'&#198;'=>'&AElig;','&#230;'=>'&aelig;','&#192;'=>'&Agrave;','&#224;'=>'&agrave;','&#8501;'=>'&alefsym;',
566
		'&#913;'=>'&Alpha;','&#945;'=>'&alpha;','&#8743;'=>'&and;','&#8736;'=>'&ang;',
567
		'&#39;'=>'&apos;','&#197;'=>'&Aring;','&#229;'=>'&aring;','&#8776;'=>'&asymp;','&#195;'=>'&Atilde;',
568
		'&#227;'=>'&atilde;','&#196;'=>'&Auml;','&#228;'=>'&auml;','&#8222;'=>'&bdquo;','&#914;'=>'&Beta;',
569
		'&#946;'=>'&beta;','&#166;'=>'&brvbar;','&#8226;'=>'&bull;','&#8745;'=>'&cap;','&#199;'=>'&Ccedil;',
570
		'&#231;'=>'&ccedil;','&#184;'=>'&cedil;','&#162;'=>'&cent;','&#935;'=>'&Chi;','&#967;'=>'&chi;',
571
		'&#710;'=>'&circ;','&#9827;'=>'&clubs;','&#8773;'=>'&cong;','&#169;'=>'&copy;','&#8629;'=>'&crarr;',
572
		'&#8746;'=>'&cup;','&#164;'=>'&curren;','&#8225;'=>'&Dagger;','&#8224;'=>'&dagger;','&#8659;'=>'&dArr;',
573
		'&#8595;'=>'&darr;','&#176;'=>'&deg;','&#916;'=>'&Delta;','&#948;'=>'&delta;','&v#9830;'=>'&diams;',
574
		'&#247;'=>'&divide;','&#201;'=>'&Eacute;','&#233;'=>'&eacute;','&#202;'=>'&Ecirc;','&#234;'=>'&ecirc;',
575
		'&#200;'=>'&Egrave;','&#232;'=>'&egrave;','&#8709;'=>'&empty;','&#8195;'=>'&emsp;','&#8194;'=>'&ensp;',
576
		'&#917;'=>'&Epsilon;','&#949;'=>'&epsilon;','&#8801;'=>'&equiv;','&#919;'=>'&Eta;','&#951;'=>'&eta;',
577
		'&#208;'=>'&ETH;','&#240;'=>'&eth;','&#203;'=>'&Euml;','&#235;'=>'&euml;','&#8364;'=>'&euro;',
578
		'&#8707;'=>'&exist;','&#402;'=>'&fnof;','&#8704;'=>'&forall;','&#189;'=>'&frac12;','&#188;'=>'&frac14;',
579
		'&#190;'=>'&frac34;','&#8260;'=>'&frasl;','&#915;'=>'&Gamma;','&#947;'=>'&gamma;','&#8805;'=>'&ge;',
580
		'&#8660;'=>'&hArr;','&#8596;'=>'&harr;','&#9829;'=>'&hearts;','&#8230;'=>'&hellip;',
581
		'&#205;'=>'&Iacute;','&#237;'=>'&iacute;','&#206;'=>'&Icirc;','&#238;'=>'&icirc;','&#161;'=>'&iexcl;',
582
		'&#204;'=>'&Igrave;','&#236;'=>'&igrave;','&#8465;'=>'&image;','&#8734;'=>'&infin;','&#8747;'=>'&int;',
583
		'&#921;'=>'&Iota;','&#953;'=>'&iota;','&#191;'=>'&iquest;','&#8712;'=>'&isin;','&#207;'=>'&Iuml;',
584
		'&#239;'=>'&iuml;','&#922;'=>'&Kappa;','&#954;'=>'&kappa;','&#923;'=>'&Lambda;','&#955;'=>'&lambda;',
585
		'&#9001;'=>'&lang;','&#171;'=>'&laquo;','&#8656;'=>'&lArr;','&#8592;'=>'&larr;','&#8968;'=>'&lceil;',
586
		'&#8220;'=>'&ldquo;','&#8804;'=>'&le;','&#8970;'=>'&lfloor;','&#8727;'=>'&lowast;','&#9674;'=>'&loz;',
587
		'&#8206;'=>'&lrm;','&#8249;'=>'&lsaquo;','&#8216;'=>'&lsquo;','&#175;'=>'&macr;',
588
		'&#8212;'=>'&mdash;','&#181;'=>'&micro;','&#183;'=>'&middot;','&#8722;'=>'&minus;','&#924;'=>'&Mu;',
589
		'&#956;'=>'&mu;','&#8711;'=>'&nabla;','&#160;'=>'&nbsp;','&#8211;'=>'&ndash;','&#8800;'=>'&ne;',
590
		'&#8715;'=>'&ni;','&#172;'=>'&not;','&#8713;'=>'&notin;','&#8836;'=>'&nsub;','&#209;'=>'&Ntilde;',
591
		'&#241;'=>'&ntilde;','&#925;'=>'&Nu;','&#957;'=>'&nu;','&#211;'=>'&Oacute;','&#243;'=>'&oacute;',
592
		'&#212;'=>'&Ocirc;','&#244;'=>'&ocirc;','&#338;'=>'&OElig;','&#339;'=>'&oelig;','&#210;'=>'&Ograve;',
593
		'&#242;'=>'&ograve;','&#8254;'=>'&oline;','&#937;'=>'&Omega;','&#969;'=>'&omega;','&#927;'=>'&Omicron;',
594
		'&#959;'=>'&omicron;','&#8853;'=>'&oplus;','&#8744;'=>'&or;','&#170;'=>'&ordf;','&#186;'=>'&ordm;',
595
		'&#216;'=>'&Oslash;','&#248;'=>'&oslash;','&#213;'=>'&Otilde;','&#245;'=>'&otilde;','&#8855;'=>'&otimes;',
596
		'&#214;'=>'&Ouml;','&#246;'=>'&ouml;','&#182;'=>'&para;','&#8706;'=>'&part;','&#8240;'=>'&permil;',
597
		'&#8869;'=>'&perp;','&#934;'=>'&Phi;','&#966;'=>'&phi;','&#928;'=>'&Pi;','&#960;'=>'&pi;','&#982;'=>'&piv;',
598
		'&#177;'=>'&plusmn;','&#163;'=>'&pound;','&#8243;'=>'&Prime;','&#8242;'=>'&prime;','&#8719;'=>'&prod;',
599
		'&#8733;'=>'&prop;','&#936;'=>'&Psi;','&#968;'=>'&psi;','&#34;'=>'&quot;','&#8730;'=>'&radic;',
600
		'&#9002;'=>'&rang;','&#187;'=>'&raquo;','&#8658;'=>'&rArr;','&#8594;'=>'&rarr;','&#8969;'=>'&rceil;',
601
		'&#8221;'=>'&rdquo;','&#8476;'=>'&real;','&#174;'=>'&reg;','&#8971;'=>'&rfloor;','&#929;'=>'&Rho;',
602
		'&#961;'=>'&rho;','&#8207;'=>'&rlm;','&#8250;'=>'&rsaquo;','&#8217;'=>'&rsquo;','&#8218;'=>'&sbquo;',
603
		'&#352;'=>'&Scaron;','&#353;'=>'&scaron;','&#8901;'=>'&sdot;','&#167;'=>'&sect;','&#173;'=>'&shy;',
604
		'&#931;'=>'&Sigma;','&#963;'=>'&sigma;','&#962;'=>'&sigmaf;','&#8764;'=>'&sim;','&#9824;'=>'&spades;',
605
		'&#8834;'=>'&sub;','&#8838;'=>'&sube;','&#8721;'=>'&sum;','&#8835;'=>'&sup;','&#185;'=>'&sup1;',
606
		'&#178;'=>'&sup2;','&#179;'=>'&sup3;','&#8839;'=>'&supe;','&#223;'=>'&szlig;','&#932;'=>'&Tau;',
607
		'&#964;'=>'&tau;','&#8756;'=>'&there4;','&#920;'=>'&Theta;','&#952;'=>'&theta;','&#977;'=>'&thetasym;',
608
		'&#8201;'=>'&thinsp;','&#222;'=>'&THORN;','&#254;'=>'&thorn;','&#732;'=>'&tilde;','&#215;'=>'&times;',
609
		'&#8482;'=>'&trade;','&#218;'=>'&Uacute;','&#250;'=>'&uacute;','&#8657;'=>'&uArr;','&#8593;'=>'&uarr;',
610
		'&#219;'=>'&Ucirc;','&#251;'=>'&ucirc;','&#217;'=>'&Ugrave;','&#249;'=>'&ugrave;','&#168;'=>'&uml;',
611
		'&#978;'=>'&upsih;','&#933;'=>'&Upsilon;','&#965;'=>'&upsilon;','&#220;'=>'&Uuml;','&#252;'=>'&uuml;',
612
		'&#8472;'=>'&weierp;','&#926;'=>'&Xi;','&#958;'=>'&xi;','&#221;'=>'&Yacute;','&#253;'=>'&yacute;',
613
		'&#165;'=>'&yen;','&#376;'=>'&Yuml;','&#255;'=>'&yuml;','&#918;'=>'&Zeta;','&#950;'=>'&zeta;','&#8205;'=>'&zwj;',
614
		'&#8204;'=>'&zwnj;'
615
	);
616
		
617
	if ($in == 'HTML-ENTITIES') {
618
		$string = strtr($string, $named_to_numbered_entities);
619
		$string = preg_replace("/&#([0-9]+);/e", "code_to_utf8($1)", $string);
620
	}
621
	elseif ($out == 'HTML-ENTITIES') {
622
		$char = "";
623
		$i=0;
624
		$len=strlen($string);
625
		if($len==0) return $string;
626
		do {
627
			if(ord($string{$i}) <= 127) $ud = $string{$i++};
628
			elseif(ord($string{$i}) <= 223) $ud = (ord($string{$i++})-192)*64 + (ord($string{$i++})-128);
629
			elseif(ord($string{$i}) <= 239) $ud = (ord($string{$i++})-224)*4096 + (ord($string{$i++})-128)*64 + (ord($string{$i++})-128);
630
			elseif(ord($string{$i}) <= 247) $ud = (ord($string{$i++})-240)*262144 + (ord($string{$i++})-128)*4096 + (ord($string{$i++})-128)*64 + (ord($string{$i++})-128);
631
			elseif(ord($string{$i}) <= 251) $ud = ord($string{$i++}); // error!
632
			if($ud > 127) {
633
				$char .= "&#$ud;";
634
			} else {
635
				$char .= $ud;
636
			}
637
		} while($i < $len);
638
		$string = $char;
639
		$string = strtr($string, $numbered_to_named_entities);
640
		// do ' and "
641
		$string = strtr($string, array('\''=>'&#39;', '\"'=>'&quot;'));
642
	}
643
	return $string;
366
// Convert a string from mixed html-entities/umlauts to pure $charset_out-umlauts
367
// Will replace all numeric and named entities except &gt; &lt; &apos; &quot; &#39; &nbsp;
368
// In case of error the returned string is unchanged, and a message is emitted.
369
function entities_to_umlauts($string, $charset_out=DEFAULT_CHARSET) {
370
	//init utf8-functions -- workaround to prevent functions-utf8.php and charsets_table.php (~140kB) to be loaded more than once
371
	init_utf8funcs();
372
	return entities_to_umlauts2($string, $charset_out);
644 373
}
645 374

  
646
// support-function for string_decode_encode_entities()
647
function code_to_utf8($num) {
648
	if ($num <= 0x7F) {
649
		return chr($num);
650
	} elseif ($num <= 0x7FF) {
651
		return chr(($num >> 6) + 192) . chr(($num & 63) + 128);
652
	} elseif ($num <= 0xFFFF) {
653
		 return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
654
	} elseif ($num <= 0x1FFFFF) {
655
		return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
656
	}
657
	return " ";
658
}
659

  
660
// Function to convert a string from mixed html-entities/umlauts to pure utf-8-umlauts
661
function string_to_utf8($string, $charset=DEFAULT_CHARSET) {
662
	$charset = strtoupper($charset);
663
	if ($charset == '') { $charset = 'ISO-8859-1'; }
664

  
665
	if (!is_UTF8($string)) {
666
		$string=mb_convert_encoding_wrapper($string, 'UTF-8', $charset);
667
	}
668
	// check if we really get UTF-8. We don't get UTF-8 if charset is ISO-8859-6 or ISO-2022-JP/KR
669
	// and mb_string AND iconv aren't available.
670
	if (is_UTF8($string)) {
671
		$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8');
672
		$string=mb_convert_encoding_wrapper($string, 'UTF-8', 'HTML-ENTITIES');
673
	} else {
674
		// nothing we can do here :-(
675
	}
676
	return($string);
677
}
678

  
679
// function to check if a string is UTF-8
680
function is_UTF8 ($str) {
681
	if (strlen($str) < 4000) {
682
		// see http://bugs.php.net/bug.php?id=24460 and http://bugs.php.net/bug.php?id=27070 and http://ilia.ws/archives/5-Top-10-ways-to-crash-PHP.html for this.
683
		// 4000 works for me ...
684
		return preg_match('/^(?:[\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$/s', $str);
685
	}	else {
686
		$isUTF8 = true;
687
		while($str{0}) {
688
			if (preg_match("/^[\x09\x0A\x0D\x20-\x7E]/", $str)) { $str = substr($str, 1); continue; }
689
			if (preg_match("/^[\xC2-\xDF][\x80-\xBF]/", $str)) { $str = substr($str, 2); continue; }
690
			if (preg_match("/^\xE0[\xA0-\xBF][\x80-\xBF]/", $str)) { $str = substr($str, 3); continue; }
691
			if (preg_match("/^[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}/", $str)) { $str = substr($str, 3); continue; }
692
			if (preg_match("/^\xED[\x80-\x9F][\x80-\xBF]/", $str)) { $str = substr($str, 3); continue; }
693
			if (preg_match("/^\xF0[\x90-\xBF][\x80-\xBF]{2}/", $str)) { $str = substr($str, 4); continue; }
694
			if (preg_match("/^[\xF1-\xF3][\x80-\xBF]{3}/", $str)) { $str = substr($str, 4); continue; }
695
			if (preg_match("/^\xF4[\x80-\x8F][\x80-\xBF]{2}/", $str)) { $str = substr($str, 4); continue; }
696
			if (preg_match("/^$/", $str)) { break; }
697
			$isUTF8 = false;
698
			break;
699
		}
700
		return ($isUTF8);
701
	}
702
}
703

  
704
// Function to convert a string from mixed html-entities/umlauts to pure $charset_out-umlauts
705
function entities_to_umlauts($string, $charset_out=DEFAULT_CHARSET) {
706
	$charset_out = strtoupper($charset_out);
707
	if ($charset_out == '') { $charset_out = 'ISO-8859-1'; }
708
	$charset_in = strtoupper(DEFAULT_CHARSET);
709
	require_once(WB_PATH.'/framework/charsets_table.php');
710
	global $iso_8859_2_to_utf8, $iso_8859_3_to_utf8, $iso_8859_4_to_utf8, $iso_8859_5_to_utf8, $iso_8859_6_to_utf8, $iso_8859_7_to_utf8, $iso_8859_8_to_utf8, $iso_8859_9_to_utf8, $iso_8859_10_to_utf8, $iso_8859_11_to_utf8;
711
	global $utf8_to_iso_8859_2, $utf8_to_iso_8859_3, $utf8_to_iso_8859_4, $utf8_to_iso_8859_5, $utf8_to_iso_8859_6, $utf8_to_iso_8859_7, $utf8_to_iso_8859_8, $utf8_to_iso_8859_9, $utf8_to_iso_8859_10, $utf8_to_iso_8859_11;
712

  
713
	// string to utf-8, entities_to_utf8
714
	if (substr($charset_in,0,8) == 'ISO-8859' || $charset_in == 'UTF-8') {
715
		if ($charset_in == 'ISO-8859-1') {
716
			$string=utf8_encode($string);
717
		} elseif ($charset_in == 'ISO-8859-2') {
718
			$string = strtr($string, $iso_8859_2_to_utf8);
719
		} elseif ($charset_in == 'ISO-8859-3') {
720
			$string = strtr($string, $iso_8859_3_to_utf8);
721
		} elseif ($charset_in == 'ISO-8859-4') {
722
			$string = strtr($string, $iso_8859_4_to_utf8);
723
		} elseif ($charset_in == 'ISO-8859-5') {
724
			$string = strtr($string, $iso_8859_5_to_utf8);
725
		} elseif ($charset_in == 'ISO-8859-6') {
726
			$string = strtr($string, $iso_8859_6_to_utf8);
727
		} elseif ($charset_in == 'ISO-8859-7') {
728
			$string = strtr($string, $iso_8859_7_to_utf8);
729
		} elseif ($charset_in == 'ISO-8859-8') {
730
			$string = strtr($string, $iso_8859_8_to_utf8);
731
		} elseif ($charset_in == 'ISO-8859-9') {
732
			$string = strtr($string, $iso_8859_9_to_utf8);
733
		} elseif ($charset_in == 'ISO-8859-10') {
734
			$string = strtr($string, $iso_8859_10_to_utf8);
735
		} elseif ($charset_in == 'ISO-8859-11') {
736
			$string = strtr($string, $iso_8859_11_to_utf8);
737
		}
738
		// decode html-entities
739
		if(preg_match("/&[#a-zA-Z0-9]+;/", $string)) {
740
			$string=string_decode_encode_entities($string, 'UTF-8', 'HTML-ENTITIES');
741
			//$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8'); // alternative to string_decode_encode_entities()
742
			//$string=mb_convert_encoding_wrapper($string, 'UTF-8', 'HTML-ENTITIES');
743
		}
744
	}
745
	else {
746
		$string = string_to_utf8($string); // will decode html-entities, too.
747
	}
748
	// string to $charset_out
749
	if($charset_out == 'ISO-8859-1') {
750
			$string=utf8_decode($string);
751
	} elseif($charset_out == 'ISO-8859-2') {
752
		$string = strtr($string, $utf8_to_iso_8859_2);
753
	} elseif($charset_out == 'ISO-8859-3') {
754
		$string = strtr($string, $utf8_to_iso_8859_3);
755
	} elseif($charset_out == 'ISO-8859-4') {
756
		$string = strtr($string, $utf8_to_iso_8859_4);
757
	} elseif($charset_out == 'ISO-8859-5') {
758
		$string = strtr($string, $utf8_to_iso_8859_5);
759
	} elseif($charset_out == 'ISO-8859-6') {
760
		$string = strtr($string, $utf8_to_iso_8859_6);
761
	} elseif($charset_out == 'ISO-8859-7') {
762
		$string = strtr($string, $utf8_to_iso_8859_7);
763
	} elseif($charset_out == 'ISO-8859-8') {
764
		$string = strtr($string, $utf8_to_iso_8859_8);
765
	} elseif($charset_out == 'ISO-8859-9') {
766
		$string = strtr($string, $utf8_to_iso_8859_9);
767
	} elseif($charset_out == 'ISO-8859-10') {
768
		$string = strtr($string, $utf8_to_iso_8859_10);
769
	} elseif($charset_out == 'ISO-8859-11') {
770
		$string = strtr($string, $utf8_to_iso_8859_11);
771
	} elseif($charset_out != 'UTF-8') {
772
		if(is_UTF8($string)) {
773
			$string=mb_convert_encoding_wrapper($string, $charset_out, 'UTF-8');
774
		}
775
	}
776
	return $string;
777
}	
778

  
779
// Function to convert a string from mixed html-entitites/$charset_in-umlauts to pure html-entities
375
// Will convert a string in $charset_in encoding to a pure ASCII string with HTML-entities.
376
// In case of error the returned string is unchanged, and a message is emitted.
780 377
function umlauts_to_entities($string, $charset_in=DEFAULT_CHARSET) {
781
	$charset_in = strtoupper($charset_in);
782
	if ($charset_in == "") { $charset_in = 'ISO-8859-1'; }
783
	require_once(WB_PATH.'/framework/charsets_table.php');
784
	global $iso_8859_2_to_utf8, $iso_8859_3_to_utf8, $iso_8859_4_to_utf8, $iso_8859_5_to_utf8, $iso_8859_6_to_utf8, $iso_8859_7_to_utf8, $iso_8859_8_to_utf8, $iso_8859_9_to_utf8, $iso_8859_10_to_utf8, $iso_8859_11_to_utf8;
785

  
786
	// string to utf-8, umlauts_to_entities
787
	if ($charset_in == 'UTF-8' || substr($charset_in,0,8) == 'ISO-8859') {
788
		if ($charset_in == 'ISO-8859-1') {
789
			$string=utf8_encode($string);
790
		} elseif ($charset_in == 'ISO-8859-2') {
791
			$string = strtr($string, $iso_8859_2_to_utf8);
792
		} elseif ($charset_in == 'ISO-8859-3') {
793
			$string = strtr($string, $iso_8859_3_to_utf8);
794
		} elseif ($charset_in == 'ISO-8859-4') {
795
			$string = strtr($string, $iso_8859_4_to_utf8);
796
		} elseif ($charset_in == 'ISO-8859-5') {
797
			$string = strtr($string, $iso_8859_5_to_utf8);
798
		} elseif ($charset_in == 'ISO-8859-6') {
799
			$string = strtr($string, $iso_8859_6_to_utf8);
800
		} elseif ($charset_in == 'ISO-8859-7') {
801
			$string = strtr($string, $iso_8859_7_to_utf8);
802
		} elseif ($charset_in == 'ISO-8859-8') {
803
			$string = strtr($string, $iso_8859_8_to_utf8);
804
		} elseif ($charset_in == 'ISO-8859-9') {
805
			$string = strtr($string, $iso_8859_9_to_utf8);
806
		} elseif ($charset_in == 'ISO-8859-10') {
807
			$string = strtr($string, $iso_8859_10_to_utf8);
808
		} elseif ($charset_in == 'ISO-8859-11') {
809
			$string = strtr($string, $iso_8859_11_to_utf8);
810
		}
811
		// encode html-entities
812
		$string=string_decode_encode_entities($string, 'HTML-ENTITIES', 'UTF-8');
813
		//$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8');
814
	}
815
	else {
816
		$string = string_to_utf8($string, $charset_in);
817
		// encode html-entities
818
		if (is_UTF8($string)) {
819
			$string=string_decode_encode_entities($string, 'HTML-ENTITIES', 'UTF-8');
820
			//$string=mb_convert_encoding_wrapper($string, 'HTML-ENTITIES', 'UTF-8');
821
		}
822
	}
823
	return $string;
378
	//init utf8-functions -- workaround to prevent functions-utf8.php and charsets_table.php (~140kB) to be loaded more than once
379
	init_utf8funcs();
380
	return umlauts_to_entities2($string, $charset_in);
824 381
}
825 382

  
826
function umlauts_to_defcharset($string, $charset) {
827
		$charset_out = strtoupper(DEFAULT_CHARSET);
828
		if ($charset_out == "") { $charset_out = 'ISO-8859-1'; }
829
		require_once(WB_PATH.'/framework/charsets_table.php');
830
		global $utf8_to_iso_8859_2, $utf8_to_iso_8859_3, $utf8_to_iso_8859_4, $utf8_to_iso_8859_5, $utf8_to_iso_8859_6, $utf8_to_iso_8859_7, $utf8_to_iso_8859_8, $utf8_to_iso_8859_9, $utf8_to_iso_8859_10, $utf8_to_iso_8859_11;
831
		
832
		if($charset_out == $charset) {
833
			return $string;
834
		}
835

  
836
		if($charset == 'UTF-8') {
837
			if($charset_out == 'ISO-8859-1') {
838
				$string = utf8_decode($string);
839
			} elseif ($charset_out == 'ISO-8859-2') {
840
				$string = strtr($string, $utf8_to_iso_8859_2);
841
			} elseif ($charset_out == 'ISO-8859-3') {
842
				$string = strtr($string, $utf8_to_iso_8859_3);
843
			} elseif ($charset_out == 'ISO-8859-4') {
844
				$string = strtr($string, $utf8_to_iso_8859_4);
845
			} elseif ($charset_out == 'ISO-8859-5') {
846
				$string = strtr($string, $utf8_to_iso_8859_5);
847
			} elseif ($charset_out == 'ISO-8859-6') {
848
				$string = strtr($string, $utf8_to_iso_8859_6);
849
			} elseif ($charset_out == 'ISO-8859-7') {
850
				$string = strtr($string, $utf8_to_iso_8859_7);
851
			} elseif ($charset_out == 'ISO-8859-8') {
852
				$string = strtr($string, $utf8_to_iso_8859_8);
853
			} elseif ($charset_out == 'ISO-8859-9') {
854
				$string = strtr($string, $utf8_to_iso_8859_9);
855
			} elseif ($charset_out == 'ISO-8859-10') {
856
				$string = strtr($string, $utf8_to_iso_8859_10);
857
			} elseif ($charset_out == 'ISO-8859-11') {
858
				$string = strtr($string, $utf8_to_iso_8859_11);
859
			}
860
			else {
861
				$string=mb_convert_encoding_wrapper($string, $charset_out, $charset);
862
			}
863
		}
864
		else {
865
			$string=mb_convert_encoding_wrapper($string, $charset_out, $charset);
866
		}
867
		
868
	return $string;
869
}
870
	
871
// translate any latin/greek/cyrillic html-entities to their plain 7bit equivalents
872
// and numbered-entities into hex
873
function entities_to_7bit($string) {
874
	require(WB_PATH.'/framework/convert.php');
875
	$string = strtr($string, $conversion_array);
876
	$string = preg_replace('/&#([0-9]+);/e', "dechex('$1')",  $string);
877
	return($string);
878
}
879

  
880 383
// Function to convert a page title to a page filename
881 384
function page_filename($string) {
882
	$string = entities_to_7bit(umlauts_to_entities($string));
385
	//init utf8-functions -- workaround to prevent functions-utf8.php and charsets_table.php (~140kB) to be loaded more than once
386
	init_utf8funcs();
387
	$string = entities_to_7bit($string);
883 388
	// Now replace spaces with page spcacer
884 389
	$string = trim($string);
885 390
	$string = preg_replace('/(\s)+/', PAGE_SPACER, $string);
......
903 408

  
904 409
// Function to convert a desired media filename to a clean filename
905 410
function media_filename($string) {
906
	$string = entities_to_7bit(umlauts_to_entities($string));
411
	//init utf8-functions -- workaround to prevent functions-utf8.php and charsets_table.php (~140kB) to be loaded more than once
412
	init_utf8funcs();
413
	$string = entities_to_7bit($string);
907 414
	// Now remove all bad characters
908 415
	$bad = array(
909 416
	'\'', // '

Also available in: Unified diff