Project

General

Profile

1
<?php
2
/**
3
 *
4
 * @category        modules
5
 * @package         output_filter
6
 * @author          Christian Sommer, WB-Project, Werner v.d. Decken
7
 * @copyright       2011, Website Baker Org. e.V.
8
 * @link			http://www.websitebaker2.org/
9
 * @license         http://www.gnu.org/licenses/gpl.html
10
 * @platform        WebsiteBaker 2.8.2
11
 * @requirements    PHP 5.2.2 and higher
12
 * @version         $Id: filter-routines.php 1534 2011-12-08 12:22:16Z darkviper $
13
 * @filesource		$HeadURL: svn://isteam.dynxs.de/wb-archiv/branches/2.8.x/wb/modules/output_filter/filter-routines.php $
14
 * @lastmodified    $Date: 2011-12-08 13:22:16 +0100 (Thu, 08 Dec 2011) $
15
 *
16
 */
17
/* -------------------------------------------------------- */
18
// Must include code to stop this file being accessed directly
19
require_once( dirname(dirname(dirname(__FILE__))).'/framework/globalExceptionHandler.php');
20
if(!defined('WB_PATH')) { throw new IllegalFileException(); }
21
/* -------------------------------------------------------- */
22

    
23
/* ************************************************************************** */
24
/**
25
 * execute the frontend output filter to modify email addresses
26
 * @param string actual content
27
 * @return string modified content
28
 */
29
	function executeFrontendOutputFilter($content) {
30
		// get output filter settings from database
31
		$filter_settings = getOutputFilterSettings();
32
		$output_filter_mode = 0;
33
		$output_filter_mode |= ($filter_settings['email_filter'] * pow(2, 0));  // n | 2^0
34
		$output_filter_mode |= ($filter_settings['mailto_filter'] * pow(2, 1)); // n | 2^1
35
		define('OUTPUT_FILTER_AT_REPLACEMENT', $filter_settings['at_replacement']);
36
		define('OUTPUT_FILTER_DOT_REPLACEMENT', $filter_settings['dot_replacement']);
37
/* ### filter type: full qualified URLs ##################################### */
38
        if($filter_settings['sys_rel'] == 1){
39
			$content = _doFilterRelUrl($content);
40
		}
41
/* ### filter type: protect email addresses ################################# */
42
		if( ($output_filter_mode & pow(2, 0)) || ($output_filter_mode & pow(2, 1)) ) {
43
			$content = _doFilterEmail($content, $output_filter_mode);
44
		}
45
/* ### end of filters ####################################################### */
46
		return $content;
47
	}
48
/* ************************************************************************** */
49
/**
50
 * function to read the current filter settings
51
 * @global object $database
52
 * @global object $admin
53
 * @param void
54
 * @return array contains all settings
55
 */
56
	function getOutputFilterSettings() {
57
		global $database, $admin;
58
	// set default values
59
		$settings = array(
60
			'sys_rel'         => 0,
61
			'email_filter'    => 0,
62
			'mailto_filter'   => 0,
63
			'at_replacement'  => '(at)',
64
			'dot_replacement' => '(dot)'
65
		);
66
	// be sure field 'sys_rel' is in table
67
		$database->field_add( TABLE_PREFIX.'mod_output_filter', 'sys_rel', 'INT NOT NULL DEFAULT \'0\' FIRST');
68
	// request settings from database
69
		$sql = 'SELECT * FROM `'.TABLE_PREFIX.'mod_output_filter';
70
		if(($res = $database->query($sql))) {
71
			if(($rec = $res->fetchRow())) {
72
				$settings = $rec;
73
				$settings['at_replacement']  = $admin->strip_slashes($settings['at_replacement']);
74
				$settings['dot_replacement'] = $admin->strip_slashes($settings['dot_replacement']);
75
			}
76
		}
77
	// return array with filter settings
78
		return $settings;
79
	}
80
/* ************************************************************************** */
81
/**
82
 * Convert full qualified, local URLs into relative URLs
83
 * @param string $content
84
 * @return string
85
 */
86
	function _doFilterRelUrl($content) {
87
		$content = preg_replace_callback(
88
				'/((?:href|src)\s*=\s*")([^\"]*?)(")/iU',
89
				create_function('$matches',
90
				    '$retval = $matches[0]; '.
91
		            '$h = parse_url($matches[2], PHP_URL_HOST); '.
92
					'if(isset($h) && $h != \'\') { '.
93
					'if(stripos(WB_URL, $h) !== false) { '.
94
					'$a = parse_url($matches[2]); '.
95
					'$p = (isset($a[\'path\']) ? $a[\'path\'] : \'\'); '.
96
					'$q = (isset($a[\'query\']) ? \'?\'.$a[\'query\'] : \'\'); '.
97
					'$f = (isset($a[\'fragment\']) ? \'#\'.$a[\'fragment\'] : \'\'); '.
98
					'$p .= ($q.$f); '.
99
					'$retval = $matches[1]."/".(isset($p) ? ltrim(str_replace("//", "/", $p), "/") : "").$matches[3]; '.
100
					'}} return $retval;'),
101
		        $content);
102
		return $content;
103
	}
104
/* ************************************************************************** */
105
/**
106
 * protect email addresses (replace '@' and '.' and obfuscate address
107
 * @param string $content
108
 * @return string
109
 */
110
	function _doFilterEmail($content, $output_filter_mode) {
111
	// test if js-decryption is installed
112
		if( preg_match('/<head.*<.*src=\".*\/mdcr.js.*>.*<\/head/siU', $content) ) {
113
			$output_filter_mode |= pow(2, 2); // n | 2^2
114
		}else {
115
		// try to insert js-decrypt into <head> if available
116
			$script = str_replace('\\', '/', dirname(__FILE__)).'/js/mdcr.js';
117
			if(is_readable($script)) {
118
				$scriptLink = '<script src="'.$script.'" type="text/javascript"></script>';
119
				$regex = '/(.*)(<\s*?\/\s*?head\s*>.*)/isU';
120
				$replace = '$1'.$scriptLink.'$2';
121
				$content = preg_replace ($regex, $replace, $content);
122
				$output_filter_mode |= pow(2, 2); // n | 2^2
123
			}
124
		}
125
	// define some constants so we do not call the database in the callback functions again
126
		define('OUTPUT_FILTER_MODE', (int)$output_filter_mode);
127
/* *** obfuscate mailto addresses by js:mdcr *** */
128
		// work out the defined output filter mode: possible output filter modes: [0], 1, 2, 3, 6, 7
129
		// 2^0 * (0.. disable, 1.. enable) filtering of mail addresses in text
130
		// 2^1 * (0.. disable, 1.. enable) filtering of mail addresses in mailto links
131
		// 2^2 * (0.. disable, 1.. enable) Javascript mailto encryption (only if mailto filtering enabled)
132

    
133
		// first search part to find all mailto email addresses
134
		$pattern = '#(<a[^<]*href\s*?=\s*?"\s*?mailto\s*?:\s*?)([A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4})([^"]*?)"([^>]*>)(.*?)</a>';
135
		// second part to find all non mailto email addresses
136
		$pattern .= '|(value\s*=\s*"|\')??\b([A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4})\b#i';
137
/*
138
	Sub 1:\b(<a.[^<]*href\s*?=\s*?"\s*?mailto\s*?:\s*?)			-->	"<a id="yyy" class="xxx" href = " mailto :" ignoring white spaces
139
	Sub 2:([A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4})			-->	the email address in the mailto: part of the mail link
140
	Sub 3:([^"]*?)"												--> possible ?Subject&cc... stuff attached to the mail address
141
	Sub 4:([^>]*>)												--> all class or id statements after the mailto but before closing ..>
142
	Sub 5:(.*?)</a>\b											--> the mailto text; all characters between >xxxxx</a>
143
	Sub 6:|\b([A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4})\b		--> email addresses which may appear in the text (require word boundaries)
144
*/
145
		// find all email addresses embedded in the content and filter them using a callback function
146
		$content = preg_replace_callback($pattern, '_cbDoExecuteFilter', $content);
147
		return $content;
148
	}
149
/* ************************************************************************** */
150
/**
151
 * callback-function for function _doFilterEmail() to proceed search results
152
 * @param array results from preg_replace
153
 * @return string proceeded replacement string
154
 */
155
	function _cbDoExecuteFilter($match) {
156
		// check if required arguments are defined
157
		if(!( defined('OUTPUT_FILTER_MODE')
158
		      && defined('OUTPUT_FILTER_AT_REPLACEMENT')
159
		      && defined('OUTPUT_FILTER_MODE')
160
    	    ) ) {
161
			return $match[0];
162
		}
163
		$search = array('@', '.');
164
		$replace = array(OUTPUT_FILTER_AT_REPLACEMENT ,OUTPUT_FILTER_DOT_REPLACEMENT);
165
		// check if the match contains the expected number of subpatterns (6|8)
166
		switch (count($match)) {
167
			case 8:
168
			/** OUTPUT FILTER FOR EMAIL ADDRESSES EMBEDDED IN TEXT **/
169
			// 1.. text mails only, 3.. text mails + mailto (no JS), 7 text mails + mailto (JS)
170
				if(!in_array(OUTPUT_FILTER_MODE, array(1,3,7))) return $match[0];
171
				// do not filter mail addresses included in input tags (<input ... value = "test@mail)
172
				if (strpos($match[6], 'value') !== false) return $match[0];
173
				// filtering of non mailto email addresses enabled
174
				return str_replace($search, $replace, $match[0]);
175
			break;
176
			case 6:
177
			/** OUTPUT FILTER FOR EMAIL ADDRESSES EMBEDDED IN MAILTO LINKS **/
178
			// 2.. mailto only (no JS), 3.. text mails + mailto (no JS), 6.. mailto only (JS), 7.. all filters active
179
				if(!in_array(OUTPUT_FILTER_MODE, array(2,3,6,7))) return $match[0];
180
				// check if last part of the a href link: >xxxx</a> contains a email address we need to filter
181
				$pattern = '#[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4}#i';
182
				if(preg_match_all($pattern, $match[5], $matches)) {
183
					foreach($matches as $submatch) {
184
						foreach($submatch as $value) {
185
						// replace all . and all @ in email address parts by (dot) and (at) strings
186
							$match[5] = str_replace($value, str_replace($search, $replace, $value), $match[5]);
187
						}
188
					}
189
				}
190
				// check if Javascript encryption routine is enabled
191
				if(in_array(OUTPUT_FILTER_MODE, array(6,7))) {
192
				/** USE JAVASCRIPT ENCRYPTION FOR MAILTO LINKS **/
193
				// extract possible class and id attribute from ahref link
194
					preg_match('/class\s*?=\s*?("|\')(.*?)\1/ix', $match[0], $class_attr);
195
					$class_attr = empty($class_attr) ? '' : 'class="' . $class_attr[2] . '" ';
196
					preg_match('/id\s*?=\s*?("|\')(.*?)\1/ix', $match[0], $id_attr);
197
					$id_attr = empty($id_attr) ? '' : 'id="' . $id_attr[2] . '" ';
198
				// preprocess mailto link parts for further usage
199
					$search = array('@', '.', '_', '-'); $replace = array('F', 'Z', 'X', 'K');
200
					$email_address = str_replace($search, $replace, strtolower($match[2]));
201
					$email_subject = rawurlencode(html_entity_decode($match[3]));
202
				// create a random encryption key for the Caesar cipher
203
					mt_srand((double)microtime()*1000000);	// (PHP < 4.2.0)
204
					$shift = mt_rand(1, 25);
205
				// encrypt the email using an adapted Caesar cipher
206
					$encrypted_email = "";
207
					for($i = strlen($email_address) -1; $i > -1; $i--) {
208
						if(preg_match('#[FZXK0-9]#', $email_address[$i], $characters)) {
209
							$encrypted_email .= $email_address[$i];
210
						} else {
211
							$encrypted_email .= chr((ord($email_address[$i]) -97 + $shift) % 26 + 97);
212
						}
213
					}
214
					$encrypted_email .= chr($shift + 97);
215
				// build the encrypted Javascript mailto link
216
					$mailto_link  = "<a {$class_attr}{$id_attr}href=\"javascript:mdcr('$encrypted_email','$email_subject')\">" .$match[5] ."</a>";
217
					return $mailto_link;
218
				} else {
219
				/** DO NOT USE JAVASCRIPT ENCRYPTION FOR MAILTO LINKS **/
220
				// as minimum protection, replace @ in the mailto part by (at)
221
				// dots are not transformed as this would transform my.name@domain.com into: my(dot)name(at)domain(dot)com
222
				// rebuild the mailto link from the subpatterns (at the missing characters " and </a>")
223
					return $match[1] .str_replace('@', OUTPUT_FILTER_AT_REPLACEMENT, $match[2]) .$match[3] .'"' .$match[4] .$match[5] .'</a>';
224
				// if you want to protect both, @ and dots, comment out the line above and remove the comment from the line below
225
				// return $match[1] .str_replace($search, $replace, $match[2]) .$match[3] .'"' .$match[4] .$match[5] .'</a>';
226
				}
227
			break;
228
			default:
229
		// number of subpatterns do not match the requirements ... do nothing
230
				return $match[0];
231
			break;
232
		}
233
	}
234
/* ************************************************************************** */
(2-2/8)