Project

General

Profile

« Previous | Next » 

Revision 869

Added by thorn about 16 years ago

search: added search_lang for use in search-form. Using DE, the search will search ä=ae,... (alternate spelling of german umlauts)
removed undocumented word-boundary search
search_path is now anchored to the beginning of link ("link LIKE '$path%'" instead of "link LIKE '%$path%'")
added key 'ext_charset' to search-extension to query external databases

View differences:

search.php
117 117
	}
118 118
}
119 119

  
120
// Get search language
121
$search_lang = '';
122
if(isset($_REQUEST['search_lang'])) {
123
	$search_lang = $_REQUEST['search_lang'];
124
	if(!preg_match('~^[A-Z]{2}$~', $search_lang))
125
		$search_lang = LANGUAGE;
126
} else {
127
	$search_lang = LANGUAGE;
128
}
129

  
120 130
// Get the path to search into. Normally left blank
131
// ATTN: since wb2.7.1 the path is evaluated as SQL: LIKE "/path%" - which will find "/path.php", "/path/info.php", ...; But not "/de/path.php"
132
// Add a '%' in front of each path to get SQL: LIKE "%/path%"
121 133
/* possible values:
122 134
 * - a single path: "/en/" - search only pages whose link contains 'path' ("/en/machinery/bender-x09")
123
 * - a bunch of alternative pathes: "/en/,/machinery/,docs/" - alternatives paths, seperated by comma
124
 * - a bunch of paths to exclude: "-/about,/info,/jp/,/light" - search all, exclude these.
135
 * - a single path not to search into: "-/help" - search all, exclude /help...
136
 * - a bunch of alternative pathes: "/en/,%/machinery/,/docs/" - alternatives paths, seperated by comma
137
 * - a bunch of paths to exclude: "-/about,%/info,/jp/,/light" - search all, exclude these.
125 138
 * These different styles can't be mixed.
126 139
 */
127
$search_path_SQL = "";
128
$search_path = "";
140
// ATTN: in wb2.7.0 "/en/" matched all links with "/en/" somewhere in the link: "/info/en/intro.php", "/en/info.php", ...
141
// since wb2.7.1 "/en/" matches only links _starting_  with "/en/": "/en/intro/info.php"
142
// use "%/en/" (or "%/en/, %/info", ...) to get the old behavior
143
$search_path_SQL = '';
144
$search_path = '';
129 145
if(isset($_REQUEST['search_path'])) {
130
	$search_path = $wb->add_slashes($_REQUEST['search_path']);
131
	if(!preg_match('~^[-a-zA-Z0-9_,/ ]+$~', $search_path))
146
	$search_path = addslashes(htmlspecialchars(strip_tags($wb->strip_slashes($_REQUEST['search_path']))));
147
	if(!preg_match('~^%?[-a-zA-Z0-9_,/ ]+$~', $search_path))
132 148
		$search_path = '';
133 149
	if($search_path != '') {
134
		$search_path_SQL = "AND ( ";
135
		$not = "";
136
		$op = "OR";
150
		$search_path_SQL = 'AND ( ';
151
		$not = '';
152
		$op = 'OR';
137 153
		if($search_path[0] == '-') {
138
			$not = "NOT";
139
			$op = "AND";
154
			$not = 'NOT';
155
			$op = 'AND';
140 156
			$paths = explode(',', substr($search_path, 1) );
141 157
		} else {
142 158
			$paths = explode(',',$search_path);
......
144 160
		$i=0;
145 161
		foreach($paths as $p) {
146 162
			if($i++ > 0) {
147
				$search_path_SQL .= " $op";
163
				$search_path_SQL .= ' $op';
148 164
			}
149
			$search_path_SQL .= " link $not LIKE '%$p%'";			
165
			$search_path_SQL .= " link $not LIKE '".$p."%'";			
150 166
		}
151
		$search_path_SQL .= " )";
167
		$search_path_SQL .= ' )';
152 168
	}
153 169
}
154 170

  
......
167 183
$search_normal_string = '';
168 184
$search_entities_string = ''; // for SQL's LIKE
169 185
$search_display_string = ''; // for displaying
170
$search_url_string = ''; // for $_GET
186
$search_url_string = ''; // for $_GET -- ATTN: unquoted! Will become urldecoded later
171 187
$string = '';
172 188
if(isset($_REQUEST['string'])) {
173
	if($match!='exact') {
189
	if($match!='exact') { // $string will be cleaned below
174 190
		$string=str_replace(',', '', $_REQUEST['string']);
175 191
	} else {
176
		$string=$_REQUEST['string']; // $string will be cleaned below
192
		$string=$_REQUEST['string'];
177 193
	}
178 194
	// redo possible magic quotes
179 195
	$string = $wb->strip_slashes($string);
......
187 203
	$search_entities_string = str_replace('\\\\', '\\\\\\\\', $search_entities_string);
188 204
	// convert string to utf-8
189 205
	$string = entities_to_umlauts($string, 'UTF-8');
190
	// quote ' " and /  -we need quoted / for regex
191 206
	$search_url_string = $string;
192 207
	$string = preg_quote($string);
208
	// quote ' " and /  -we need quoted / for regex
193 209
	$search_normal_string = str_replace(array('\'','"','/'), array('\\\'','\"','\/'), $string);
194 210
}
195 211
// make arrays from the search_..._strings above
212
if($match == 'exact')
213
	$search_url_array[] = $search_url_string;
214
else
196 215
$search_url_array = explode(' ', $search_url_string);
197 216
$search_normal_array = array();
198 217
$search_entities_array = array();
......
220 239
$search_words = array();
221 240
foreach($search_normal_array AS $str) {
222 241
	$str = strtr($str, $string_ul_umlauts);
223
	// special-feature: '|' means word-boundary (\b). Searching for 'the|' will find the, but not thema.
224
	// this doesn't(?) work correctly for unicode-chars: '|test' will work, but '|über' not.
225
	$str = strtr($str, array('\\|'=>'\b'));
226 242
	$search_words[] = $str;
227 243
}
228 244

  

Also available in: Unified diff