00001 <?php
00002 # Copyright (C) 2004 Brion Vibber <brion@pobox.com>
00003 # http://www.mediawiki.org/
00004 #
00005 # This program is free software; you can redistribute it and/or modify
00006 # it under the terms of the GNU General Public License as published by
00007 # the Free Software Foundation; either version 2 of the License, or
00008 # (at your option) any later version.
00009 #
00010 # This program is distributed in the hope that it will be useful,
00011 # but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00013 # GNU General Public License for more details.
00014 #
00015 # You should have received a copy of the GNU General Public License along
00016 # with this program; if not, write to the Free Software Foundation, Inc.,
00017 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00018 # http://www.gnu.org/copyleft/gpl.html
00019
00029 class SearchOracle extends SearchEngine {
00030
00031 private $reservedWords = array ('ABOUT' => 1,
00032 'ACCUM' => 1,
00033 'AND' => 1,
00034 'BT' => 1,
00035 'BTG' => 1,
00036 'BTI' => 1,
00037 'BTP' => 1,
00038 'FUZZY' => 1,
00039 'HASPATH' => 1,
00040 'INPATH' => 1,
00041 'MINUS' => 1,
00042 'NEAR' => 1,
00043 'NOT' => 1,
00044 'NT' => 1,
00045 'NTG' => 1,
00046 'NTI' => 1,
00047 'NTP' => 1,
00048 'OR' => 1,
00049 'PT' => 1,
00050 'RT' => 1,
00051 'SQE' => 1,
00052 'SYN' => 1,
00053 'TR' => 1,
00054 'TRSYN' => 1,
00055 'TT' => 1,
00056 'WITHIN' => 1);
00057
00058 function __construct($db) {
00059 $this->db = $db;
00060 }
00061
00068 function searchText( $term ) {
00069 if ($term == '')
00070 return new SqlSearchResultSet(false, '');
00071
00072 $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), true)));
00073 return new SqlSearchResultSet($resultSet, $this->searchTerms);
00074 }
00075
00082 function searchTitle($term) {
00083 if ($term == '')
00084 return new SqlSearchResultSet(false, '');
00085
00086 $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), false)));
00087 return new MySQLSearchResultSet($resultSet, $this->searchTerms);
00088 }
00089
00090
00095 function queryRedirect() {
00096 if ($this->showRedirects) {
00097 return '';
00098 } else {
00099 return 'AND page_is_redirect=0';
00100 }
00101 }
00102
00107 function queryNamespaces() {
00108 if( is_null($this->namespaces) )
00109 return '';
00110 if ( !count( $this->namespaces ) ) {
00111 $namespaces = '0';
00112 } else {
00113 $namespaces = $this->db->makeList( $this->namespaces );
00114 }
00115 return 'AND page_namespace IN (' . $namespaces . ')';
00116 }
00117
00122 function queryLimit($sql) {
00123 return $this->db->limitResult($sql, $this->limit, $this->offset);
00124 }
00125
00131 function queryRanking($filteredTerm, $fulltext) {
00132 return ' ORDER BY score(1)';
00133 }
00134
00141 function getQuery( $filteredTerm, $fulltext ) {
00142 return $this->queryLimit($this->queryMain($filteredTerm, $fulltext) . ' ' .
00143 $this->queryRedirect() . ' ' .
00144 $this->queryNamespaces() . ' ' .
00145 $this->queryRanking( $filteredTerm, $fulltext ) . ' ');
00146 }
00147
00148
00154 function getIndexField($fulltext) {
00155 return $fulltext ? 'si_text' : 'si_title';
00156 }
00157
00165 function queryMain( $filteredTerm, $fulltext ) {
00166 $match = $this->parseQuery($filteredTerm, $fulltext);
00167 $page = $this->db->tableName('page');
00168 $searchindex = $this->db->tableName('searchindex');
00169 return 'SELECT page_id, page_namespace, page_title ' .
00170 "FROM $page,$searchindex " .
00171 'WHERE page_id=si_page AND ' . $match;
00172 }
00173
00178 function parseQuery($filteredText, $fulltext) {
00179 global $wgContLang;
00180 $lc = SearchEngine::legalSearchChars();
00181 $this->searchTerms = array();
00182
00183 # FIXME: This doesn't handle parenthetical expressions.
00184 $m = array();
00185 $searchon = '';
00186 if (preg_match_all('/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
00187 $filteredText, $m, PREG_SET_ORDER)) {
00188 foreach($m as $terms) {
00189
00190
00191 $temp_terms = $wgContLang->autoConvertToAllVariants( $terms[2] );
00192 if( is_array( $temp_terms )) {
00193 $temp_terms = array_unique( array_values( $temp_terms ));
00194 foreach( $temp_terms as $t ) {
00195 $searchon .= ($terms[1] == '-' ? ' ~' : ' & ') . $this->escapeTerm( $t );
00196 }
00197 }
00198 else {
00199 $searchon .= ($terms[1] == '-' ? ' ~' : ' & ') . $this->escapeTerm( $terms[2] );
00200 }
00201 if (!empty($terms[3])) {
00202 $regexp = preg_quote( $terms[3], '/' );
00203 if ($terms[4])
00204 $regexp .= "[0-9A-Za-z_]+";
00205 } else {
00206 $regexp = preg_quote(str_replace('"', '', $terms[2]), '/');
00207 }
00208 $this->searchTerms[] = $regexp;
00209 }
00210 }
00211
00212
00213 $searchon = $this->db->addQuotes(ltrim($searchon, ' &'));
00214 $field = $this->getIndexField($fulltext);
00215 return " CONTAINS($field, $searchon, 1) > 0 ";
00216 }
00217
00218 private function escapeTerm($t) {
00219 global $wgContLang;
00220 $t = $wgContLang->normalizeForSearch($t);
00221 $t = isset($this->reservedWords[strtoupper($t)]) ? '{'.$t.'}' : $t;
00222 $t = preg_replace('/^"(.*)"$/', '($1)', $t);
00223 $t = preg_replace('/([-&|])/', '\\\\$1', $t);
00224 return $t;
00225 }
00234 function update($id, $title, $text) {
00235 $dbw = wfGetDB(DB_MASTER);
00236 $dbw->replace('searchindex',
00237 array('si_page'),
00238 array(
00239 'si_page' => $id,
00240 'si_title' => $title,
00241 'si_text' => $text
00242 ), 'SearchOracle::update' );
00243 $dbw->query("CALL ctx_ddl.sync_index('si_text_idx')");
00244 $dbw->query("CALL ctx_ddl.sync_index('si_title_idx')");
00245 }
00246
00254 function updateTitle($id, $title) {
00255 $dbw = wfGetDB(DB_MASTER);
00256
00257 $dbw->update('searchindex',
00258 array('si_title' => $title),
00259 array('si_page' => $id),
00260 'SearchOracle::updateTitle',
00261 array());
00262 }
00263
00264
00265 public static function legalSearchChars() {
00266 return "\"" . parent::legalSearchChars();
00267 }
00268 }