00001 <?php
00002 # Copyright (C) 2004 Brion Vibber <brion@pobox.com>
00003 # http://www.mediawiki.org/
00004 #
00005 # This program is free software; you can redistribute it and/or modify
00006 # it under the terms of the GNU General Public License as published by
00007 # the Free Software Foundation; either version 2 of the License, or
00008 # (at your option) any later version.
00009 #
00010 # This program is distributed in the hope that it will be useful,
00011 # but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00013 # GNU General Public License for more details.
00014 #
00015 # You should have received a copy of the GNU General Public License along
00016 # with this program; if not, write to the Free Software Foundation, Inc.,
00017 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00018 # http://www.gnu.org/copyleft/gpl.html
00019
00029 class SearchMySQL extends SearchEngine {
00030 var $strictMatching = true;
00031 static $mMinSearchLength;
00032
00034 function __construct( $db ) {
00035 $this->db = $db;
00036 }
00037
00042 function parseQuery( $filteredText, $fulltext ) {
00043 global $wgContLang;
00044 $lc = SearchEngine::legalSearchChars();
00045 $searchon = '';
00046 $this->searchTerms = array();
00047
00048 # FIXME: This doesn't handle parenthetical expressions.
00049 $m = array();
00050 if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
00051 $filteredText, $m, PREG_SET_ORDER ) ) {
00052 foreach( $m as $bits ) {
00053 @list( , $modifier, $term, $nonQuoted, $wildcard ) = $bits;
00054
00055 if( $nonQuoted != '' ) {
00056 $term = $nonQuoted;
00057 $quote = '';
00058 } else {
00059 $term = str_replace( '"', '', $term );
00060 $quote = '"';
00061 }
00062
00063 if( $searchon !== '' ) $searchon .= ' ';
00064 if( $this->strictMatching && ($modifier == '') ) {
00065
00066 $modifier = '+';
00067 }
00068
00069
00070
00071 $convertedVariants = $wgContLang->autoConvertToAllVariants( $term );
00072 if( is_array( $convertedVariants ) ) {
00073 $variants = array_unique( array_values( $convertedVariants ) );
00074 } else {
00075 $variants = array( $term );
00076 }
00077
00078
00079
00080
00081
00082 $strippedVariants = array_map(
00083 array( $wgContLang, 'normalizeForSearch' ),
00084 $variants );
00085
00086
00087
00088
00089 $strippedVariants = array_unique( $strippedVariants );
00090
00091 $searchon .= $modifier;
00092 if( count( $strippedVariants) > 1 )
00093 $searchon .= '(';
00094 foreach( $strippedVariants as $stripped ) {
00095 $stripped = $this->normalizeText( $stripped );
00096 if( $nonQuoted && strpos( $stripped, ' ' ) !== false ) {
00097
00098
00099
00100 $stripped = '"' . trim( $stripped ) . '"';
00101 }
00102 $searchon .= "$quote$stripped$quote$wildcard ";
00103 }
00104 if( count( $strippedVariants) > 1 )
00105 $searchon .= ')';
00106
00107
00108
00109 $regexp = $this->regexTerm( $term, $wildcard );
00110 $this->searchTerms[] = $regexp;
00111 }
00112 wfDebug( __METHOD__ . ": Would search with '$searchon'\n" );
00113 wfDebug( __METHOD__ . ': Match with /' . implode( '|', $this->searchTerms ) . "/\n" );
00114 } else {
00115 wfDebug( __METHOD__ . ": Can't understand search query '{$filteredText}'\n" );
00116 }
00117
00118 $searchon = $this->db->strencode( $searchon );
00119 $field = $this->getIndexField( $fulltext );
00120 return " MATCH($field) AGAINST('$searchon' IN BOOLEAN MODE) ";
00121 }
00122
00123 function regexTerm( $string, $wildcard ) {
00124 global $wgContLang;
00125
00126 $regex = preg_quote( $string, '/' );
00127 if( $wgContLang->hasWordBreaks() ) {
00128 if( $wildcard ) {
00129
00130 $regex = "\b$regex";
00131 } else {
00132 $regex = "\b$regex\b";
00133 }
00134 } else {
00135
00136
00137
00138 }
00139 return $regex;
00140 }
00141
00142 public static function legalSearchChars() {
00143 return "\"*" . parent::legalSearchChars();
00144 }
00145
00152 function searchText( $term ) {
00153 return $this->searchInternal( $term, true );
00154 }
00155
00162 function searchTitle( $term ) {
00163 return $this->searchInternal( $term, false );
00164 }
00165
00166 protected function searchInternal( $term, $fulltext ) {
00167 global $wgCountTotalSearchHits;
00168
00169 $filteredTerm = $this->filter( $term );
00170 $resultSet = $this->db->query( $this->getQuery( $filteredTerm, $fulltext ) );
00171
00172 $total = null;
00173 if( $wgCountTotalSearchHits ) {
00174 $totalResult = $this->db->query( $this->getCountQuery( $filteredTerm, $fulltext ) );
00175 $row = $totalResult->fetchObject();
00176 if( $row ) {
00177 $total = intval( $row->c );
00178 }
00179 $totalResult->free();
00180 }
00181
00182 return new MySQLSearchResultSet( $resultSet, $this->searchTerms, $total );
00183 }
00184
00185
00190 function queryRedirect() {
00191 if( $this->showRedirects ) {
00192 return '';
00193 } else {
00194 return 'AND page_is_redirect=0';
00195 }
00196 }
00197
00202 function queryNamespaces() {
00203 if( is_null($this->namespaces) )
00204 return ''; # search all
00205 if ( !count( $this->namespaces ) ) {
00206 $namespaces = '0';
00207 } else {
00208 $namespaces = $this->db->makeList( $this->namespaces );
00209 }
00210 return 'AND page_namespace IN (' . $namespaces . ')';
00211 }
00212
00217 function queryLimit() {
00218 return $this->db->limitResult( '', $this->limit, $this->offset );
00219 }
00220
00226 function queryRanking( $filteredTerm, $fulltext ) {
00227 return '';
00228 }
00229
00236 function getQuery( $filteredTerm, $fulltext ) {
00237 return $this->queryMain( $filteredTerm, $fulltext ) . ' ' .
00238 $this->queryRedirect() . ' ' .
00239 $this->queryNamespaces() . ' ' .
00240 $this->queryRanking( $filteredTerm, $fulltext ) . ' ' .
00241 $this->queryLimit();
00242 }
00243
00249 function getIndexField( $fulltext ) {
00250 return $fulltext ? 'si_text' : 'si_title';
00251 }
00252
00263 function queryMain( $filteredTerm, $fulltext ) {
00264 $match = $this->parseQuery( $filteredTerm, $fulltext );
00265 $page = $this->db->tableName( 'page' );
00266 $searchindex = $this->db->tableName( 'searchindex' );
00267 return 'SELECT page_id, page_namespace, page_title ' .
00268 "FROM $page,$searchindex " .
00269 'WHERE page_id=si_page AND ' . $match;
00270 }
00271
00272 function getCountQuery( $filteredTerm, $fulltext ) {
00273 $match = $this->parseQuery( $filteredTerm, $fulltext );
00274 $page = $this->db->tableName( 'page' );
00275 $searchindex = $this->db->tableName( 'searchindex' );
00276 return "SELECT COUNT(*) AS c " .
00277 "FROM $page,$searchindex " .
00278 'WHERE page_id=si_page AND ' . $match .
00279 $this->queryRedirect() . ' ' .
00280 $this->queryNamespaces();
00281 }
00282
00291 function update( $id, $title, $text ) {
00292 $dbw = wfGetDB( DB_MASTER );
00293 $dbw->replace( 'searchindex',
00294 array( 'si_page' ),
00295 array(
00296 'si_page' => $id,
00297 'si_title' => $this->normalizeText( $title ),
00298 'si_text' => $this->normalizeText( $text )
00299 ), __METHOD__ );
00300 }
00301
00309 function updateTitle( $id, $title ) {
00310 $dbw = wfGetDB( DB_MASTER );
00311
00312 $dbw->update( 'searchindex',
00313 array( 'si_title' => $this->normalizeText( $title ) ),
00314 array( 'si_page' => $id ),
00315 __METHOD__,
00316 array( $dbw->lowPriorityOption() ) );
00317 }
00318
00323 function normalizeText( $string ) {
00324 global $wgContLang;
00325
00326 wfProfileIn( __METHOD__ );
00327
00328
00329 $out = $wgContLang->wordSegmentation( $string );
00330
00331
00332
00333 $out = preg_replace_callback(
00334 "/([\\xc0-\\xff][\\x80-\\xbf]*)/",
00335 array( $this, 'stripForSearchCallback' ),
00336 $wgContLang->lc( $out ) );
00337
00338
00339
00340
00341 $minLength = $this->minSearchLength();
00342 if( $minLength > 1 ) {
00343 $n = $minLength - 1;
00344 $out = preg_replace(
00345 "/\b(\w{1,$n})\b/",
00346 "$1u800",
00347 $out );
00348 }
00349
00350
00351
00352
00353
00354
00355
00356 $out = preg_replace(
00357 "/(\w)\.(\w|\*)/u",
00358 "$1u82e$2",
00359 $out );
00360
00361 wfProfileOut( __METHOD__ );
00362
00363 return $out;
00364 }
00365
00371 protected function stripForSearchCallback( $matches ) {
00372 return 'u8' . bin2hex( $matches[1] );
00373 }
00374
00381 protected function minSearchLength() {
00382 if( is_null( self::$mMinSearchLength ) ) {
00383 $sql = "SHOW GLOBAL VARIABLES LIKE 'ft\\_min\\_word\\_len'";
00384
00385 $dbr = wfGetDB( DB_SLAVE );
00386 $result = $dbr->query( $sql );
00387 $row = $result->fetchObject();
00388 $result->free();
00389
00390 if( $row && $row->Variable_name == 'ft_min_word_len' ) {
00391 self::$mMinSearchLength = intval( $row->Value );
00392 } else {
00393 self::$mMinSearchLength = 0;
00394 }
00395 }
00396 return self::$mMinSearchLength;
00397 }
00398 }
00399
00403 class MySQLSearchResultSet extends SqlSearchResultSet {
00404 function MySQLSearchResultSet( $resultSet, $terms, $totalHits=null ) {
00405 parent::__construct( $resultSet, $terms );
00406 $this->mTotalHits = $totalHits;
00407 }
00408
00409 function getTotalHits() {
00410 return $this->mTotalHits;
00411 }
00412 }