| 1 | <?php
 | 
  
    | 2 | /**
 | 
  
    | 3 |  * @category        ISTeasy
 | 
  
    | 4 |  * @package         DatabaseSearchEngine 1
 | 
  
    | 5 |  * @author          Werner von der Decken
 | 
  
    | 6 |  * @copyright       2011, ISTeasy-project
 | 
  
    | 7 |  * @license         http://www.gnu.org/licenses/gpl.html
 | 
  
    | 8 |  * @version         $Id: DseTwo.php 1499 2011-08-12 11:21:25Z DarkViper $
 | 
  
    | 9 |  * @filesource		$HeadURL: svn://isteam.dynxs.de/wb-archiv/branches/2.8.x/wb/framework/DseTwo.php $
 | 
  
    | 10 |  * @description     Searchengine to browse whoole database for text.
 | 
  
    | 11 |  *                  Black- or whitelist is possible
 | 
  
    | 12 |  *                  min requirements: PHP 5.2.2, mySQL 5.1
 | 
  
    | 13 |  *                  this is a authorisised GPL-lizensed derivate from the original
 | 
  
    | 14 |  *                  ISTeasy class DseOne which is available under a cc-by-sa-3.0 license
 | 
  
    | 15 | */
 | 
  
    | 16 | /* -------------------------------------------------------- */
 | 
  
    | 17 | // Must include code to stop this file being accessed directly
 | 
  
    | 18 | if(!defined('WB_PATH')) {
 | 
  
    | 19 | 	require_once(dirname(__FILE__).'/globalExceptionHandler.php');
 | 
  
    | 20 | 	throw new IllegalFileException();
 | 
  
    | 21 | }
 | 
  
    | 22 | /* -------------------------------------------------------- */
 | 
  
    | 23 | 
 | 
  
    | 24 | class DseTwo {
 | 
  
    | 25 | 
 | 
  
    | 26 | 	const USE_ALL       = 0;
 | 
  
    | 27 | 	const USE_BLACKLIST = 1;
 | 
  
    | 28 | 	const USE_WHITELIST = 2;
 | 
  
    | 29 | 
 | 
  
    | 30 | 	const RETURN_UNUSED = 0;
 | 
  
    | 31 | 	const RETURN_USED   = 1;
 | 
  
    | 32 | 	/**
 | 
  
    | 33 | 	 *
 | 
  
    | 34 | 	 * @var object database 
 | 
  
    | 35 | 	 */
 | 
  
    | 36 | 	private $_db;
 | 
  
    | 37 | 	/**
 | 
  
    | 38 | 	 *
 | 
  
    | 39 | 	 * @var string prefix of tables to search for
 | 
  
    | 40 | 	 */
 | 
  
    | 41 | 	private $_TablePrefix;
 | 
  
    | 42 | 	/**
 | 
  
    | 43 | 	 *
 | 
  
    | 44 | 	 * @var string name of the database
 | 
  
    | 45 | 	 */
 | 
  
    | 46 | 	private $_db_name;
 | 
  
    | 47 | 	/**
 | 
  
    | 48 | 	 *
 | 
  
    | 49 | 	 * @var array list of unneeded tables.fields
 | 
  
    | 50 | 	 */
 | 
  
    | 51 | 	private $_ControllList;
 | 
  
    | 52 | 	private $_ControllListTyp;
 | 
  
    | 53 | 	private $_ControllListTypen = array('All','BlackList','WhiteList');
 | 
  
    | 54 | 
 | 
  
    | 55 | 	private $_Queries;
 | 
  
    | 56 | 	private $_BasePath = '';
 | 
  
    | 57 | 	private $_CachePath = '';
 | 
  
    | 58 | 	private $_TCacheFile = '';
 | 
  
    | 59 | 	private $_DCachePrefix = '';
 | 
  
    | 60 | 	private $_bUseCache = true;
 | 
  
    | 61 | 	/**
 | 
  
    | 62 | 	 *
 | 
  
    | 63 | 	 * @param object $database global database object
 | 
  
    | 64 | 	 */
 | 
  
    | 65 | 	public function __construct()
 | 
  
    | 66 | 	{
 | 
  
    | 67 | 		$this->_ControllList = array();
 | 
  
    | 68 | 		$this->_TCacheFile = 'Ie'.__CLASS__.'CacheTables';
 | 
  
    | 69 | 		$this->_DCachePrefix = 'Ie'.__CLASS__.'CacheDir';
 | 
  
    | 70 | 		$this->_Queries = array();
 | 
  
    | 71 | 	}
 | 
  
    | 72 | 	/**
 | 
  
    | 73 | 	 *
 | 
  
    | 74 | 	 * @param string $name name of the property
 | 
  
    | 75 | 	 *        (db_handle, db_name, table_prefix, base_dir, cache_dir, use_cache)
 | 
  
    | 76 | 	 * @param mixed $value value of the property
 | 
  
    | 77 | 	 */
 | 
  
    | 78 | 	public function  __set($name, $value) {
 | 
  
    | 79 | 
 | 
  
    | 80 | 		switch(strtolower($name)):
 | 
  
    | 81 | 			case 'db_handle':
 | 
  
    | 82 | 				if($value) { $this->_db = $value; }
 | 
  
    | 83 | 				break;
 | 
  
    | 84 | 			case 'db_name':
 | 
  
    | 85 | 				if($value != '') { $this->_db_name = $value; }
 | 
  
    | 86 | 				break;
 | 
  
    | 87 | 			case 'table_prefix':
 | 
  
    | 88 | 				if($value != '') { $this->_TablePrefix = $value; }
 | 
  
    | 89 | 				break;
 | 
  
    | 90 | 			case 'base_dir':
 | 
  
    | 91 | 				if($value != '') {
 | 
  
    | 92 | 					$this->_BasePath = rtrim(str_replace('\\', '/', $value) , '/');
 | 
  
    | 93 | 				}
 | 
  
    | 94 | 				break;
 | 
  
    | 95 | 			case 'cache_dir':
 | 
  
    | 96 | 				$value = rtrim(str_replace('\\', '/', $value) , '/');
 | 
  
    | 97 | 				if(!is_dir($value)) {
 | 
  
    | 98 | 					if(!mkdir($value, 0777, true)) {
 | 
  
    | 99 | 						$this->_CachePath = '';
 | 
  
    | 100 | 						$this->_bUseCache = false;
 | 
  
    | 101 | 						break;
 | 
  
    | 102 | 					}
 | 
  
    | 103 | 				}
 | 
  
    | 104 | 				if(is_writable($value)) {
 | 
  
    | 105 | 					$this->_CachePath = $value;
 | 
  
    | 106 | 					$this->_bUseCache = true;
 | 
  
    | 107 | 				}else {
 | 
  
    | 108 | 					$this->_CachePath = '';
 | 
  
    | 109 | 					$this->_bUseCache = false;
 | 
  
    | 110 | 				}
 | 
  
    | 111 | 				break;
 | 
  
    | 112 | 			default:
 | 
  
    | 113 | 				throw new InvalidArgumentException( __CLASS__.'::'.$name );
 | 
  
    | 114 | 				break;
 | 
  
    | 115 | 		endswitch;
 | 
  
    | 116 | 	}
 | 
  
    | 117 | 
 | 
  
    | 118 | 	/**
 | 
  
    | 119 | 	 * delete all table cache files
 | 
  
    | 120 | 	 */
 | 
  
    | 121 | 	public function clearCache()
 | 
  
    | 122 | 	{
 | 
  
    | 123 | 		foreach($this->_ControllListTypen as $type) {
 | 
  
    | 124 | 			$cFile = $this->_CachePath.'/'.$this->_TCacheFile.$type;
 | 
  
    | 125 | 			if(file_exists($cFile)) { @unlink($cFile); }
 | 
  
    | 126 | 		}
 | 
  
    | 127 | 	}
 | 
  
    | 128 | 	/**
 | 
  
    | 129 | 	 *
 | 
  
    | 130 | 	 * @param string $blacklist path/filename of the blacklist
 | 
  
    | 131 | 	 * @param int $type const USE_NO_LIST / USE_BLACKLIST / USE_WHITELIST
 | 
  
    | 132 | 	 * @return bool false if no or empty list is available
 | 
  
    | 133 | 	 */
 | 
  
    | 134 | 	public function addControllList($sControllList, $type = self::USE_BLACKLIST)
 | 
  
    | 135 | 	{
 | 
  
    | 136 | 		$this->_ControllList = array();
 | 
  
    | 137 | 		$this->_ControllListTyp = $type;
 | 
  
    | 138 | 		if(is_readable($sControllList)) {
 | 
  
    | 139 | 			if(($list = file($sControllList, FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES)) !== false)
 | 
  
    | 140 | 			{
 | 
  
    | 141 | 				$list = preg_grep('/^\s*?[^#;]/', $list);
 | 
  
    | 142 | 				$this->_ControllList = preg_replace('/^\s*?(.*)\s*?$/', $this->_TablePrefix.'$1', $list);
 | 
  
    | 143 | 				unset($list);
 | 
  
    | 144 | 			}
 | 
  
    | 145 | 		}else {
 | 
  
    | 146 | 			$this->_ControllListTyp = self::USE_ALL;
 | 
  
    | 147 | 		}
 | 
  
    | 148 | 		if(($type == self::USE_BLACKLIST) && (sizeof($this->_ControllList) > 0)) {
 | 
  
    | 149 | 			$this->_ControllListTyp = self::USE_ALL;
 | 
  
    | 150 | 		}
 | 
  
    | 151 | 		return (sizeof($this->_ControllList) > 0);
 | 
  
    | 152 | 	}
 | 
  
    | 153 | 	/**
 | 
  
    | 154 | 	 *
 | 
  
    | 155 | 	 * @param string $sDirToSearch directory to scan (relative to base_dir)
 | 
  
    | 156 | 	 * @param integer $bRetunMode select matching or unmatching files
 | 
  
    | 157 | 	 * @return array list of matching files
 | 
  
    | 158 | 	 */
 | 
  
    | 159 | 	public function getMatchesFromDir($sDirToSearch, $bRetunMode = self::RETURN_USED)
 | 
  
    | 160 | 	{
 | 
  
    | 161 | 		$aResultFileList = array();
 | 
  
    | 162 | 		$aNewFileList = array();
 | 
  
    | 163 | 		$sDirToSearch = trim(str_replace('\\', '/', $sDirToSearch) , '/');
 | 
  
    | 164 | 		$sPathToSearch = $this->_BasePath.'/'.$sDirToSearch;
 | 
  
    | 165 | 		$sCacheFile = $this->_DCachePrefix.$bRetunMode.urlencode('/'.$sDirToSearch);
 | 
  
    | 166 | 		$sCacheFile = $this->_CachePath.'/'.$sCacheFile;
 | 
  
    | 167 | 		if(sizeof($this->_Queries) <= 0) { $this->_getTableQueries(); }
 | 
  
    | 168 | 		// read fileList from directory
 | 
  
    | 169 | 		try{
 | 
  
    | 170 | 			foreach( new DirectoryIterator($sPathToSearch) as $fileinfo ) {
 | 
  
    | 171 | 			// at first collect all files from target directory
 | 
  
    | 172 | 				$fileName = $fileinfo->getFilename();
 | 
  
    | 173 | 				if(($fileinfo->isFile()) &&
 | 
  
    | 174 | 				   (!$fileinfo->isDot()) &&
 | 
  
    | 175 | 				   ($fileinfo->getFilename() != 'index.php')) {
 | 
  
    | 176 | 				   $aNewFileList[] = $fileinfo->getFilename();
 | 
  
    | 177 | 				}
 | 
  
    | 178 | 			}
 | 
  
    | 179 | 		}catch(UnexpectedValueException $e) {}
 | 
  
    | 180 | 		// make checksum of current directory
 | 
  
    | 181 | 		$bCacheValid = false;
 | 
  
    | 182 | 		if($this->_bUseCache) {
 | 
  
    | 183 | 			$checkSum = crc32(serialize($aNewFileList));
 | 
  
    | 184 | 			if(is_readable($sCacheFile)){
 | 
  
    | 185 | 			// read cachefile if available
 | 
  
    | 186 | 				$aResultFileList = unserialize(file_get_contents($sCacheFile));
 | 
  
    | 187 | 				if($checkSum == array_shift($aResultFileList)) {
 | 
  
    | 188 | 				// compare new checksum against checksum from cachefile
 | 
  
    | 189 | 					$bCacheValid = true;
 | 
  
    | 190 | 				}
 | 
  
    | 191 | 			}
 | 
  
    | 192 | 		}
 | 
  
    | 193 | 		if(!$bCacheValid) {
 | 
  
    | 194 | 		// skip this loop if valid cache is available
 | 
  
    | 195 | 			$aResultFileList = array();
 | 
  
    | 196 | 			while (list( , $sFilename) = each($aNewFileList)) {
 | 
  
    | 197 | 				// iterate all tables and search for filename
 | 
  
    | 198 | 				if( $this->_getMatch($sDirToSearch.'/'.$sFilename) !== false) {
 | 
  
    | 199 | 					if($bRetunMode == self::RETURN_USED) { $aResultFileList[] = $sFilename; }
 | 
  
    | 200 | 				}else {
 | 
  
    | 201 | 					if($bRetunMode == self::RETURN_UNUSED) { $aResultFileList[] = $sFilename; }
 | 
  
    | 202 | 				}
 | 
  
    | 203 | 			}
 | 
  
    | 204 | 			// calculate new checksum
 | 
  
    | 205 | 			$newCheckSum = crc32(serialize($aResultFileList));
 | 
  
    | 206 | 			// add checksum to array
 | 
  
    | 207 | 			array_unshift($aResultFileList,  $newCheckSum);
 | 
  
    | 208 | 			// try to write serialized array into new cachefile
 | 
  
    | 209 | 			if(file_put_contents($sCacheFile, serialize($aResultFileList)) === false) {
 | 
  
    | 210 | 				throw new RuntimeException();
 | 
  
    | 211 | 			}
 | 
  
    | 212 | 			// remove checksum again
 | 
  
    | 213 | 			array_shift($aResultFileList);
 | 
  
    | 214 | 		}
 | 
  
    | 215 | 		unset($aNewFileList);
 | 
  
    | 216 | 		return $aResultFileList;
 | 
  
    | 217 | 	}
 | 
  
    | 218 | 	/**
 | 
  
    | 219 | 	 *
 | 
  
    | 220 | 	 * @param <type> $sFilename
 | 
  
    | 221 | 	 * @return bool true if file found in db
 | 
  
    | 222 | 	 */
 | 
  
    | 223 | 	private function _getMatch($sFilename)
 | 
  
    | 224 | 	{
 | 
  
    | 225 | 		$result = 0;
 | 
  
    | 226 | 		$sFilename = str_replace('_', '\_', $sFilename);
 | 
  
    | 227 | 		$sSearch = '%'.str_replace('/', '_', $sFilename).'%';
 | 
  
    | 228 | 		while (list( , $sQuery) = each($this->_Queries)) {
 | 
  
    | 229 | 			$sql = sprintf($sQuery, $sSearch);
 | 
  
    | 230 | 			if( ($res = mysql_query($sql, $this->_db)) ) {
 | 
  
    | 231 | 				if( ($result = intval(mysql_fetch_array($res))) > 0 )  { break; }
 | 
  
    | 232 | 			}
 | 
  
    | 233 | 		}
 | 
  
    | 234 | 		return ($result != 0);
 | 
  
    | 235 | 	}
 | 
  
    | 236 | 	/**
 | 
  
    | 237 | 	 *
 | 
  
    | 238 | 	 */
 | 
  
    | 239 | 	private function _getTableQueries()
 | 
  
    | 240 | 	{
 | 
  
    | 241 | 		if($this->_bUseCache) {
 | 
  
    | 242 | 		// try to read queries from cace
 | 
  
    | 243 | 			$sCacheFile = $this->_CachePath.'/'.$this->_TCacheFile.$this->_ControllListTypen[$this->_ControllListTyp];
 | 
  
    | 244 | 			try {
 | 
  
    | 245 | 				if(is_readable($sCacheFile)) {
 | 
  
    | 246 | 					$this->_Queries = unserialize(file_get_contents($sCacheFile));
 | 
  
    | 247 | 				}
 | 
  
    | 248 | 			}catch(Exception $e) {
 | 
  
    | 249 | 				$this->_Queries = array();
 | 
  
    | 250 | 			}
 | 
  
    | 251 | 		}
 | 
  
    | 252 | 		if(sizeof($this->_Queries) > 0) { return; } // queries alreade loaded from cache
 | 
  
    | 253 | 		$TP = str_replace('_','\_', $this->_TablePrefix);
 | 
  
    | 254 | 		$sql  = 'SELECT TABLE_NAME `table`, COLUMN_NAME `column` ';
 | 
  
    | 255 | 		$sql .= 'FROM INFORMATION_SCHEMA.COLUMNS ';
 | 
  
    | 256 | 		$sql .= 'WHERE `table_schema` = \''.$this->_db_name.'\' AND ';
 | 
  
    | 257 | 		$sql .=        '`table_name` LIKE \''.$TP.'%\' AND ';
 | 
  
    | 258 | 		$sql .=        '(`data_type` LIKE \'%text\' OR ';
 | 
  
    | 259 | 		$sql .=           '(`data_type` = \'varchar\' AND `character_maximum_length` > 20)';
 | 
  
    | 260 | 		$sql .=        ')' ;
 | 
  
    | 261 | 		$sql .= 'ORDER BY `table`, `column`';
 | 
  
    | 262 | 		if(($res = mysql_query($sql, $this->_db))) {
 | 
  
    | 263 | 			$lastTable = '';
 | 
  
    | 264 | 			$aOrStatements = array();
 | 
  
    | 265 | 			$sPrefix = '';
 | 
  
    | 266 | 			while($rec = mysql_fetch_assoc($res))
 | 
  
    | 267 | 			{ // loop through all found tables/fields
 | 
  
    | 268 | 				$sTableColumn = $rec['table'].'.'.$rec['column'];
 | 
  
    | 269 | 				switch($this->_ControllListTyp):
 | 
  
    | 270 | 				// test against controll list
 | 
  
    | 271 | 					case self::USE_BLACKLIST:
 | 
  
    | 272 | 						$needRecord = true;
 | 
  
    | 273 | 						if(in_array($rec['table'], $this->_ControllList) ||
 | 
  
    | 274 | 						   in_array($sTableColumn, $this->_ControllList))
 | 
  
    | 275 | 						{
 | 
  
    | 276 | 							$needRecord = false;
 | 
  
    | 277 | 						}
 | 
  
    | 278 | 						break;
 | 
  
    | 279 | 					case self::USE_WHITELIST:
 | 
  
    | 280 | 						$needRecord = false;
 | 
  
    | 281 | 						if(in_array($rec['table'], $this->_ControllList) ||
 | 
  
    | 282 | 						   in_array($sTableColumn, $this->_ControllList))
 | 
  
    | 283 | 						{
 | 
  
    | 284 | 							$needRecord = true;
 | 
  
    | 285 | 						}
 | 
  
    | 286 | 						break;
 | 
  
    | 287 | 					default: // self::USE_ALL
 | 
  
    | 288 | 						$needRecord = true;
 | 
  
    | 289 | 						break;
 | 
  
    | 290 | 				endswitch;
 | 
  
    | 291 | 				if($needRecord) {
 | 
  
    | 292 | 					if($lastTable != $rec['table']) {
 | 
  
    | 293 | 						if(sizeof($aOrStatements)!= 0){
 | 
  
    | 294 | 						// close previous table
 | 
  
    | 295 | 							$this->_Queries[] = $sPrefix.implode(') OR (', $aOrStatements).')';
 | 
  
    | 296 | 						}
 | 
  
    | 297 | 					// start a new table
 | 
  
    | 298 | 						$sPrefix = 'SELECT COUNT(*) `count` FROM `'.$rec['table'].'` WHERE( ';
 | 
  
    | 299 | 						$aOrStatements = array();
 | 
  
    | 300 | 						$lastTable = $rec['table'];
 | 
  
    | 301 | 					}
 | 
  
    | 302 | 					// add table.column to query
 | 
  
    | 303 | 					$aOrStatements[] = '`'.$rec['table'].'`.`'.$rec['column'].'` LIKE \'%1$s\'';
 | 
  
    | 304 | 				}
 | 
  
    | 305 | 			}
 | 
  
    | 306 | 			if(sizeof($aOrStatements)!= 0){
 | 
  
    | 307 | 			// close last table
 | 
  
    | 308 | 				$this->_Queries[] = $sPrefix.implode(') OR (', $aOrStatements).')';
 | 
  
    | 309 | 			}
 | 
  
    | 310 | 			mysql_free_result($res);
 | 
  
    | 311 | 		}
 | 
  
    | 312 | 		if($this->_bUseCache) {
 | 
  
    | 313 | 		// try to write queries into the cache
 | 
  
    | 314 | 			if(file_put_contents($sCacheFile, serialize($this->_Queries)) === false) {
 | 
  
    | 315 | 				throw new RuntimeException('unable to write file ['.$sCacheFile.']');
 | 
  
    | 316 | 			}
 | 
  
    | 317 | 		}
 | 
  
    | 318 | 	}
 | 
  
    | 319 | 
 | 
  
    | 320 | }
 | 
  
    | 321 | ?>
 |