Project

General

Profile

wb-2_10_x / branches / main / modules / output_filter / filters / filterEmail.php @ 5

1
<?php
2
/**
3
 * protect email addresses (replace '@' and '.' and obfuscate address
4
 * @param string $content
5
 * @return string
6
 */
7
    function doFilterEmail($content) {
8
        $aFilterSettings = getOutputFilterSettings();
9
        $key = preg_replace('=^.*?filter([^\.\/\\\\]+)(\.[^\.]+)?$=is', '\1', __FILE__);
10
        if ($aFilterSettings[$key]) {
11
            if (
12
                $aFilterSettings['OutputFilterMode'] == 0 &&
13
                $aFilterSettings['at_replacement'] == '@' &&
14
                $aFilterSettings['dot_replacement'] == '.'
15
            ) {
16
                return $content;
17
            }
18
            $bNeedMdcr = false;
19
        // test if js-decryption is available
20
            $script = str_replace('\\', '/',str_replace(WB_PATH,'', dirname(__DIR__)).'/js/mdcr.js');
21
            if(is_readable(WB_PATH.$script)) {
22
                $scriptLink = "\t".'<script src="'.WB_URL.$script.'" type="text/javascript"></script>'."\n";
23
                $aFilterSettings['OutputFilterMode'] |= pow(2, 2); // n | 2^2
24
            }
25
/* *** obfuscate mailto addresses by js:mdcr *** */
26
            // work out the defined output filter mode: possible output filter modes: [0], 1, 2, 3, 6, 7
27
            // 2^0 * (0.. disable, 1.. enable) filtering of mail addresses in text
28
            // 2^1 * (0.. disable, 1.. enable) filtering of mail addresses in mailto links
29
            // 2^2 * (0.. disable, 1.. enable) Javascript mailto encryption (only if mailto filtering enabled)
30

    
31
            // first search part to find all mailto email addresses
32
//            $pattern = '#(<a[^<]*href\s*?=\s*?"\s*?mailto\s*?:\s*?)([A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4})([^"]*?)"([^>]*>)(.*?)</a>';
33
// bugfix 20160417
34
            $pattern = '#(<a[^<]*href\s*?=\s*?"\s*?mailto\s*?:\s*?)([A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4})([^"]*?)"([^>]*>\s*)(.*?)</a>';
35
            // second part to find all non mailto email addresses
36
            $pattern .= '|(value\s*=\s*"|\')??\b([A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4})\b#i';
37
/*
38
(<a[^<]*href\s*?=\s*?"\s*?mailto\s*?:\s*?)([A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4})([^"]*?)"([^>]*>)(.*?)</a>|(value\s*=\s*"|\')??\b([A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4})\b
39
        Sub 1:\b(<a.[^<]*href\s*?=\s*?"\s*?mailto\s*?:\s*?)            -->    "<a id="yyy" class="xxx" href = " mailto :" ignoring white spaces
40
        Sub 2:([A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4})            -->    the email address in the mailto: part of the mail link
41
        Sub 3:([^"]*?)"                                                --> possible ?Subject&cc... stuff attached to the mail address
42
        Sub 4:([^>]*>)                                                --> all class or id statements after the mailto but before closing ..>
43
        Sub 5:(.*?)</a>\b                                            --> the mailto text; all characters between >xxxxx</a>
44
        Sub 6:|\b([A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4})\b        --> email addresses which may appear in the text (require word boundaries)
45
*/
46
            // find all email addresses embedded in the content and filter them using a callback function
47
            $content = preg_replace_callback(
48
                $pattern,
49
    /* ************************************************************************** */
50
                function ($match) use ($aFilterSettings, $bNeedMdcr) {
51
                    // check if required arguments are defined
52
                    $search = array('@', '.');
53
                    $replace = array($aFilterSettings['at_replacement'] ,$aFilterSettings['dot_replacement']);
54
                    // check if the match contains the expected number of subpatterns (6|8)
55
                    switch (count($match)) {
56
                        case 8:
57
                        /** OUTPUT FILTER FOR EMAIL ADDRESSES EMBEDDED IN TEXT **/
58
                        // 1.. text mails only, 3.. text mails + mailto (no JS), 7 text mails + mailto (JS)
59
                            if(!in_array($aFilterSettings['OutputFilterMode'], array(1,3,5,7))){ return $match[0]; }
60
                            // do not filter mail addresses included in input tags (<input ... value = "test@mail)
61
                            if (strpos($match[6], 'value') !== false) return $match[0]; {
62
                            // filtering of non mailto email addresses enabled
63
                                return str_replace($search, $replace, $match[0]);
64
                            }
65
                        break;
66
                        case 6:
67
                        /** OUTPUT FILTER FOR EMAIL ADDRESSES EMBEDDED IN MAILTO LINKS **/
68
                        // 2.. mailto only (no JS), 3.. text mails + mailto (no JS), 6.. mailto only (JS), 7.. all filters active
69
                            if(!in_array($aFilterSettings['OutputFilterMode'], array(2,3,6,7))){ return $match[0]; }
70
                            // check if last part of the a href link: >xxxx</a> contains a email address we need to filter
71
                            $pattern = '#[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4}#i';
72
                            if(preg_match_all($pattern, $match[5], $matches)) {
73
                                foreach($matches as $submatch) {
74
                                    foreach($submatch as $value) {
75
                                    // replace all . and all @ in email address parts by (dot) and (at) strings
76
                                        $match[5] = str_replace($value, str_replace($search, $replace, $value), $match[5]);
77
                                    }
78
                                }
79
                            }
80
                            // check if Javascript encryption routine is enabled
81
                            if(in_array($aFilterSettings['OutputFilterMode'], array(6,7))) {
82
                            /** USE JAVASCRIPT ENCRYPTION FOR MAILTO LINKS **/
83
                            // extract possible class and id attribute from ahref link
84
                                preg_match('/class\s*?=\s*?("|\')(.*?)\1/ix', $match[0], $class_attr);
85
                                $class_attr = empty($class_attr) ? '' : 'class="' . $class_attr[2] . '" ';
86
                                preg_match('/id\s*?=\s*?("|\')(.*?)\1/ix', $match[0], $id_attr);
87
                                $id_attr = empty($id_attr) ? '' : 'id="' . $id_attr[2] . '" ';
88
                            // preprocess mailto link parts for further usage
89
                                $search = array('@', '.', '_', '-'); $replace = array('F', 'Z', 'X', 'K');
90
                                $email_address = str_replace($search, $replace, strtolower($match[2]));
91
                                $email_subject = rawurlencode(html_entity_decode($match[3]));
92
                            // create a random encryption key for the Caesar cipher
93
                                mt_srand((double)microtime()*1000000);    // (PHP < 4.2.0)
94
                                $shift = mt_rand(1, 25);
95
                            // encrypt the email using an adapted Caesar cipher
96
                                $encrypted_email = "";
97
                                for($i = strlen($email_address) -1; $i > -1; $i--) {
98
                                    if(preg_match('#[FZXK0-9]#', $email_address[$i], $characters)) {
99
                                        $encrypted_email .= $email_address[$i];
100
                                    } else {
101
                                        $encrypted_email .= chr((ord($email_address[$i]) -97 + $shift) % 26 + 97);
102
                                    }
103
                                }
104
                                $encrypted_email .= chr($shift + 97);
105
                            // build the encrypted Javascript mailto link
106
                                $mailto_link  = "<a {$class_attr}{$id_attr}href=\"javascript:mdcr('$encrypted_email','$email_subject')\">" .$match[5] ."</a>";
107
                                $bNeedMdcr = true;
108
                                return $mailto_link;
109
                            } else {
110
                            /** DO NOT USE JAVASCRIPT ENCRYPTION FOR MAILTO LINKS **/
111
                            // as minimum protection, replace @ in the mailto part by (at)
112
                            // dots are not transformed as this would transform my.name@domain.com into: my(dot)name(at)domain(dot)com
113
                            // rebuild the mailto link from the subpatterns (at the missing characters " and </a>")
114
                                return $match[1].str_replace('@', $aFilterSettings['at_replacement'],$match[2]).$match[3].'"'.$match[4].$match[5].'</a>';
115
                            // if you want to protect both, @ and dots, comment out the line above and remove the comment from the line below
116
                            // return $match[1] .str_replace($search, $replace, $match[2]) .$match[3] .'"' .$match[4] .$match[5] .'</a>';
117
                            }
118
                        break;
119
                        default:
120
                    // number of subpatterns do not match the requirements ... do nothing
121
                            return $match[0];
122
                        break;
123
                    }
124
                },
125
    /* ************************************************************************** */
126
                $content
127
            );
128
        }
129
        if ($bNeedMdcr) {
130
        // test if js-decryption is installed
131
            if (!preg_match('/<head.*<.*src=\".*\/mdcr.js.*>.*<\/head/siU', $content) ) {
132
            // try to insert js-decrypt into <head> if available
133
                $script = str_replace('\\', '/',str_replace(WB_PATH,'', dirname(__DIR__)).'/js/mdcr.js');
134
                if(is_readable(WB_PATH.$script)) {
135
                    $scriptLink = "\t".'<script src="'.WB_URL.$script.'" type="text/javascript"></script>'."\n";
136
                    $regex = '/(.*)(<\s*?\/\s*?head\s*>.*)/isU';
137
                    $replace = '$1'.$scriptLink.'$2';
138
                    $content = preg_replace ($regex, $replace, $content);
139
                }
140
            }
141
        }
142
        return $content;
143
    }