00001 <?php
00013 class SearchEngine {
00014 var $limit = 10;
00015 var $offset = 0;
00016 var $prefix = '';
00017 var $searchTerms = array();
00018 var $namespaces = array( NS_MAIN );
00019 var $showRedirects = false;
00020
00029 function searchText( $term ) {
00030 return null;
00031 }
00032
00041 function searchTitle( $term ) {
00042 return null;
00043 }
00044
00046 function acceptListRedirects() {
00047 return true;
00048 }
00049
00058 public function normalizeText( $string ) {
00059 return $string;
00060 }
00061
00066 function transformSearchTerm( $term ) {
00067 return $term;
00068 }
00069
00077 public static function getNearMatch( $searchterm ) {
00078 $title = self::getNearMatchInternal( $searchterm );
00079
00080 wfRunHooks( 'SearchGetNearMatchComplete', array( $searchterm, &$title ) );
00081 return $title;
00082 }
00083
00087 private static function getNearMatchInternal( $searchterm ) {
00088 global $wgContLang;
00089
00090 $allSearchTerms = array($searchterm);
00091
00092 if ( $wgContLang->hasVariants() ) {
00093 $allSearchTerms = array_merge($allSearchTerms,$wgContLang->convertLinkToAllVariants($searchterm));
00094 }
00095
00096 if( !wfRunHooks( 'SearchGetNearMatchBefore', array( $allSearchTerms, &$titleResult ) ) ) {
00097 return $titleResult;
00098 }
00099
00100 foreach($allSearchTerms as $term) {
00101
00102 # Exact match? No need to look further.
00103 $title = Title::newFromText( $term );
00104 if (is_null($title))
00105 return null;
00106
00107 if ( $title->getNamespace() == NS_SPECIAL || $title->isExternal() || $title->exists() ) {
00108 return $title;
00109 }
00110
00111 # See if it still otherwise has content is some sane sense
00112 $article = MediaWiki::articleFromTitle( $title );
00113 if( $article->hasViewableContent() ) {
00114 return $title;
00115 }
00116
00117 # Now try all lower case (i.e. first letter capitalized)
00118 #
00119 $title = Title::newFromText( $wgContLang->lc( $term ) );
00120 if ( $title && $title->exists() ) {
00121 return $title;
00122 }
00123
00124 # Now try capitalized string
00125 #
00126 $title = Title::newFromText( $wgContLang->ucwords( $term ) );
00127 if ( $title && $title->exists() ) {
00128 return $title;
00129 }
00130
00131 # Now try all upper case
00132 #
00133 $title = Title::newFromText( $wgContLang->uc( $term ) );
00134 if ( $title && $title->exists() ) {
00135 return $title;
00136 }
00137
00138 # Now try Word-Caps-Breaking-At-Word-Breaks, for hyphenated names etc
00139 $title = Title::newFromText( $wgContLang->ucwordbreaks($term) );
00140 if ( $title && $title->exists() ) {
00141 return $title;
00142 }
00143
00144
00145 $title = null;
00146 if( !wfRunHooks( 'SearchGetNearMatch', array( $term, &$title ) ) ) {
00147 return $title;
00148 }
00149 }
00150
00151 $title = Title::newFromText( $searchterm );
00152
00153 # Entering an IP address goes to the contributions page
00154 if ( ( $title->getNamespace() == NS_USER && User::isIP($title->getText() ) )
00155 || User::isIP( trim( $searchterm ) ) ) {
00156 return SpecialPage::getTitleFor( 'Contributions', $title->getDBkey() );
00157 }
00158
00159
00160 # Entering a user goes to the user page whether it's there or not
00161 if ( $title->getNamespace() == NS_USER ) {
00162 return $title;
00163 }
00164
00165 # Go to images that exist even if there's no local page.
00166 # There may have been a funny upload, or it may be on a shared
00167 # file repository such as Wikimedia Commons.
00168 if( $title->getNamespace() == NS_FILE ) {
00169 $image = wfFindFile( $title );
00170 if( $image ) {
00171 return $title;
00172 }
00173 }
00174
00175 # MediaWiki namespace? Page may be "implied" if not customized.
00176 # Just return it, with caps forced as the message system likes it.
00177 if( $title->getNamespace() == NS_MEDIAWIKI ) {
00178 return Title::makeTitle( NS_MEDIAWIKI, $wgContLang->ucfirst( $title->getText() ) );
00179 }
00180
00181 # Quoted term? Try without the quotes...
00182 $matches = array();
00183 if( preg_match( '/^"([^"]+)"$/', $searchterm, $matches ) ) {
00184 return SearchEngine::getNearMatch( $matches[1] );
00185 }
00186
00187 return null;
00188 }
00189
00190 public static function legalSearchChars() {
00191 return "A-Za-z_'.0-9\\x80-\\xFF\\-";
00192 }
00193
00201 function setLimitOffset( $limit, $offset = 0 ) {
00202 $this->limit = intval( $limit );
00203 $this->offset = intval( $offset );
00204 }
00205
00212 function setNamespaces( $namespaces ) {
00213 $this->namespaces = $namespaces;
00214 }
00215
00222 function replacePrefixes( $query ){
00223 global $wgContLang;
00224
00225 $parsed = $query;
00226 if( strpos($query,':') === false ) {
00227 wfRunHooks( 'SearchEngineReplacePrefixesComplete', array( $this, $query, &$parsed ) );
00228 return $parsed;
00229 }
00230
00231 $allkeyword = wfMsgForContent('searchall').":";
00232 if( strncmp($query, $allkeyword, strlen($allkeyword)) == 0 ){
00233 $this->namespaces = null;
00234 $parsed = substr($query,strlen($allkeyword));
00235 } else if( strpos($query,':') !== false ) {
00236 $prefix = substr($query,0,strpos($query,':'));
00237 $index = $wgContLang->getNsIndex($prefix);
00238 if($index !== false){
00239 $this->namespaces = array($index);
00240 $parsed = substr($query,strlen($prefix)+1);
00241 }
00242 }
00243 if(trim($parsed) == '')
00244 $parsed = $query;
00245
00246 wfRunHooks( 'SearchEngineReplacePrefixesComplete', array( $this, $query, &$parsed ) );
00247
00248 return $parsed;
00249 }
00250
00255 public static function searchableNamespaces() {
00256 global $wgContLang;
00257 $arr = array();
00258 foreach( $wgContLang->getNamespaces() as $ns => $name ) {
00259 if( $ns >= NS_MAIN ) {
00260 $arr[$ns] = $name;
00261 }
00262 }
00263
00264 wfRunHooks( 'SearchableNamespaces', array( &$arr ) );
00265 return $arr;
00266 }
00267
00275 public static function userNamespaces( $user ) {
00276 global $wgSearchEverythingOnlyLoggedIn;
00277
00278
00279 $searcheverything = false;
00280 if( ( $wgSearchEverythingOnlyLoggedIn && $user->isLoggedIn() )
00281 || !$wgSearchEverythingOnlyLoggedIn )
00282 $searcheverything = $user->getOption('searcheverything');
00283
00284
00285 if( $searcheverything )
00286 return array_keys(SearchEngine::searchableNamespaces());
00287
00288 $arr = Preferences::loadOldSearchNs( $user );
00289 $searchableNamespaces = SearchEngine::searchableNamespaces();
00290
00291 $arr = array_intersect( $arr, array_keys($searchableNamespaces) );
00292
00293 return $arr;
00294 }
00295
00302 public static function userHighlightPrefs( &$user ){
00303
00304
00305 $contextlines = 2;
00306 $contextchars = 75;
00307 return array($contextlines, $contextchars);
00308 }
00309
00315 public static function defaultNamespaces(){
00316 global $wgNamespacesToBeSearchedDefault;
00317
00318 return array_keys($wgNamespacesToBeSearchedDefault, true);
00319 }
00320
00327 public static function namespacesAsText( $namespaces ){
00328 global $wgContLang;
00329
00330 $formatted = array_map( array($wgContLang,'getFormattedNsText'), $namespaces );
00331 foreach( $formatted as $key => $ns ){
00332 if ( empty($ns) )
00333 $formatted[$key] = wfMsg( 'blanknamespace' );
00334 }
00335 return $formatted;
00336 }
00337
00343 public static function helpNamespaces() {
00344 global $wgNamespacesToBeSearchedHelp;
00345
00346 return array_keys( $wgNamespacesToBeSearchedHelp, true );
00347 }
00348
00355 function filter( $text ) {
00356 $lc = $this->legalSearchChars();
00357 return trim( preg_replace( "/[^{$lc}]/", " ", $text ) );
00358 }
00365 public static function create() {
00366 global $wgSearchType;
00367 $dbr = wfGetDB( DB_SLAVE );
00368 if( $wgSearchType ) {
00369 $class = $wgSearchType;
00370 } else {
00371 $class = $dbr->getSearchEngine();
00372 }
00373 $search = new $class( $dbr );
00374 $search->setLimitOffset(0,0);
00375 return $search;
00376 }
00377
00387 function update( $id, $title, $text ) {
00388
00389 }
00390
00399 function updateTitle( $id, $title ) {
00400
00401 }
00402
00408 public static function getOpenSearchTemplate() {
00409 global $wgOpenSearchTemplate, $wgServer, $wgScriptPath;
00410 if( $wgOpenSearchTemplate ) {
00411 return $wgOpenSearchTemplate;
00412 } else {
00413 $ns = implode( '|', SearchEngine::defaultNamespaces() );
00414 if( !$ns ) $ns = "0";
00415 return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace='.$ns;
00416 }
00417 }
00418
00424 public static function getMWSuggestTemplate() {
00425 global $wgMWSuggestTemplate, $wgServer, $wgScriptPath;
00426 if($wgMWSuggestTemplate)
00427 return $wgMWSuggestTemplate;
00428 else
00429 return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace={namespaces}&suggest';
00430 }
00431 }
00432
00436 class SearchResultSet {
00444 function termMatches() {
00445 return array();
00446 }
00447
00448 function numRows() {
00449 return 0;
00450 }
00451
00458 function hasResults() {
00459 return false;
00460 }
00461
00472 function getTotalHits() {
00473 return null;
00474 }
00475
00482 function hasSuggestion() {
00483 return false;
00484 }
00485
00489 function getSuggestionQuery(){
00490 return null;
00491 }
00492
00496 function getSuggestionSnippet(){
00497 return '';
00498 }
00499
00506 function getInfo() {
00507 return null;
00508 }
00509
00515 function getInterwikiResults() {
00516 return null;
00517 }
00518
00524 function hasInterwikiResults() {
00525 return $this->getInterwikiResults() != null;
00526 }
00527
00528
00535 function next() {
00536 return false;
00537 }
00538
00542 function free() {
00543
00544 }
00545 }
00546
00550 class SqlSearchResultSet extends SearchResultSet {
00551 function __construct( $resultSet, $terms ) {
00552 $this->mResultSet = $resultSet;
00553 $this->mTerms = $terms;
00554 }
00555
00556 function termMatches() {
00557 return $this->mTerms;
00558 }
00559
00560 function numRows() {
00561 return $this->mResultSet->numRows();
00562 }
00563
00564 function next() {
00565 if ($this->mResultSet === false )
00566 return false;
00567
00568 $row = $this->mResultSet->fetchObject();
00569 if ($row === false)
00570 return false;
00571 return new SearchResult($row);
00572 }
00573
00574 function free() {
00575 $this->mResultSet->free();
00576 }
00577 }
00578
00582 class SearchResultTooMany {
00583 ## Some search engines may bail out if too many matches are found
00584 }
00585
00586
00593 class SearchResult {
00594 var $mRevision = null;
00595 var $mImage = null;
00596
00597 function __construct( $row ) {
00598 $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
00599 if( !is_null($this->mTitle) ){
00600 $this->mRevision = Revision::newFromTitle( $this->mTitle );
00601 if( $this->mTitle->getNamespace() === NS_FILE )
00602 $this->mImage = wfFindFile( $this->mTitle );
00603 }
00604 }
00605
00611 function isBrokenTitle(){
00612 if( is_null($this->mTitle) )
00613 return true;
00614 return false;
00615 }
00616
00622 function isMissingRevision(){
00623 return !$this->mRevision && !$this->mImage;
00624 }
00625
00629 function getTitle() {
00630 return $this->mTitle;
00631 }
00632
00636 function getScore() {
00637 return null;
00638 }
00639
00643 protected function initText(){
00644 if( !isset($this->mText) ){
00645 if($this->mRevision != null)
00646 $this->mText = $this->mRevision->getText();
00647 else
00648 $this->mText = '';
00649
00650 }
00651 }
00652
00657 function getTextSnippet($terms){
00658 global $wgUser, $wgAdvancedSearchHighlighting;
00659 $this->initText();
00660 list($contextlines,$contextchars) = SearchEngine::userHighlightPrefs($wgUser);
00661 $h = new SearchHighlighter();
00662 if( $wgAdvancedSearchHighlighting )
00663 return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars );
00664 else
00665 return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars );
00666 }
00667
00672 function getTitleSnippet($terms){
00673 return '';
00674 }
00675
00680 function getRedirectSnippet($terms){
00681 return '';
00682 }
00683
00687 function getRedirectTitle(){
00688 return null;
00689 }
00690
00694 function getSectionSnippet(){
00695 return '';
00696 }
00697
00701 function getSectionTitle(){
00702 return null;
00703 }
00704
00708 function getTimestamp(){
00709 if( $this->mRevision )
00710 return $this->mRevision->getTimestamp();
00711 else if( $this->mImage )
00712 return $this->mImage->getTimestamp();
00713 return '';
00714 }
00715
00719 function getWordCount(){
00720 $this->initText();
00721 return str_word_count( $this->mText );
00722 }
00723
00727 function getByteSize(){
00728 $this->initText();
00729 return strlen( $this->mText );
00730 }
00731
00735 function hasRelated(){
00736 return false;
00737 }
00738
00742 function getInterwikiPrefix(){
00743 return '';
00744 }
00745 }
00746
00752 class SearchHighlighter {
00753 var $mCleanWikitext = true;
00754
00755 function SearchHighlighter($cleanupWikitext = true){
00756 $this->mCleanWikitext = $cleanupWikitext;
00757 }
00758
00768 public function highlightText( $text, $terms, $contextlines, $contextchars ) {
00769 global $wgLang, $wgContLang;
00770 global $wgSearchHighlightBoundaries;
00771 $fname = __METHOD__;
00772
00773 if($text == '')
00774 return '';
00775
00776
00777 $spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)";
00778
00779 $endPatterns = array(
00780 1 => '/(\{\{)|(\}\})/',
00781 2 => '/(\[\[)|(\]\])/',
00782 3 => "/(\n\\{\\|)|(\n\\|\\})/");
00783
00784
00785 if(function_exists('wfCite')){
00786 $spat .= '|(<ref>)';
00787 $endPatterns[4] = '/(<ref>)|(<\/ref>)/';
00788 }
00789 $spat .= '/';
00790 $textExt = array();
00791 $otherExt = array();
00792 wfProfileIn( "$fname-split" );
00793 $start = 0;
00794 $textLen = strlen($text);
00795 $count = 0;
00796 while( $start < $textLen ){
00797
00798 if( preg_match( $spat, $text, $matches, PREG_OFFSET_CAPTURE, $start ) ){
00799 $epat = '';
00800 foreach($matches as $key => $val){
00801 if($key > 0 && $val[1] != -1){
00802 if($key == 2){
00803
00804 $ns = substr($val[0],2,-1);
00805 if( $wgContLang->getNsIndex($ns) != NS_FILE )
00806 break;
00807
00808 }
00809 $epat = $endPatterns[$key];
00810 $this->splitAndAdd( $textExt, $count, substr( $text, $start, $val[1] - $start ) );
00811 $start = $val[1];
00812 break;
00813 }
00814 }
00815 if( $epat ){
00816
00817 $level = 0;
00818 $offset = $start + 1;
00819 $found = false;
00820 while( preg_match( $epat, $text, $endMatches, PREG_OFFSET_CAPTURE, $offset ) ){
00821 if( array_key_exists(2,$endMatches) ){
00822
00823 if($level == 0){
00824 $len = strlen($endMatches[2][0]);
00825 $off = $endMatches[2][1];
00826 $this->splitAndAdd( $otherExt, $count,
00827 substr( $text, $start, $off + $len - $start ) );
00828 $start = $off + $len;
00829 $found = true;
00830 break;
00831 } else{
00832
00833 $level -= 1;
00834 }
00835 } else{
00836
00837 $level += 1;
00838 }
00839 $offset = $endMatches[0][1] + strlen($endMatches[0][0]);
00840 }
00841 if( ! $found ){
00842
00843 $this->splitAndAdd( $textExt, $count, substr( $text, $start, strlen($matches[0][0]) ) );
00844 $start += strlen($matches[0][0]);
00845 }
00846 continue;
00847 }
00848 }
00849
00850 $this->splitAndAdd( $textExt, $count, substr($text,$start) );
00851 break;
00852 }
00853
00854 $all = $textExt + $otherExt;
00855
00856 wfProfileOut( "$fname-split" );
00857
00858
00859 foreach( $terms as $index => $term ) {
00860
00861 if(preg_match('/[\x80-\xff]/', $term) ){
00862 $terms[$index] = preg_replace_callback('/./us',array($this,'caseCallback'),$terms[$index]);
00863 } else {
00864 $terms[$index] = $term;
00865 }
00866 }
00867 $anyterm = implode( '|', $terms );
00868 $phrase = implode("$wgSearchHighlightBoundaries+", $terms );
00869
00870
00871
00872
00873
00874 $scale = strlen($anyterm) / mb_strlen($anyterm);
00875 $contextchars = intval( $contextchars * $scale );
00876
00877 $patPre = "(^|$wgSearchHighlightBoundaries)";
00878 $patPost = "($wgSearchHighlightBoundaries|$)";
00879
00880 $pat1 = "/(".$phrase.")/ui";
00881 $pat2 = "/$patPre(".$anyterm.")$patPost/ui";
00882
00883 wfProfileIn( "$fname-extract" );
00884
00885 $left = $contextlines;
00886
00887 $snippets = array();
00888 $offsets = array();
00889
00890
00891 $first = 0;
00892 $firstText = '';
00893 foreach($textExt as $index => $line){
00894 if(strlen($line)>0 && $line[0] != ';' && $line[0] != ':'){
00895 $firstText = $this->extract( $line, 0, $contextchars * $contextlines );
00896 $first = $index;
00897 break;
00898 }
00899 }
00900 if( $firstText ){
00901 $succ = true;
00902
00903 foreach($terms as $term){
00904 if( ! preg_match("/$patPre".$term."$patPost/ui", $firstText) ){
00905 $succ = false;
00906 break;
00907 }
00908 }
00909 if( $succ ){
00910 $snippets[$first] = $firstText;
00911 $offsets[$first] = 0;
00912 }
00913 }
00914 if( ! $snippets ) {
00915
00916 $this->process($pat1, $textExt, $left, $contextchars, $snippets, $offsets);
00917
00918 $this->process($pat1, $otherExt, $left, $contextchars, $snippets, $offsets);
00919
00920 $this->process($pat2, $textExt, $left, $contextchars, $snippets, $offsets);
00921
00922 $this->process($pat2, $otherExt, $left, $contextchars, $snippets, $offsets);
00923
00924 ksort($snippets);
00925 }
00926
00927
00928 $extended = array();
00929 if( count( $snippets ) == 0){
00930
00931 $targetchars = $contextchars * $contextlines;
00932 $snippets[$first] = '';
00933 $offsets[$first] = 0;
00934 } else{
00935
00936 if( array_key_exists($first,$snippets) && preg_match($pat1,$snippets[$first])
00937 && $offsets[$first] < $contextchars * 2 ){
00938 $snippets = array ($first => $snippets[$first]);
00939 }
00940
00941
00942 $targetchars = intval( ($contextchars * $contextlines) / count ( $snippets ) );
00943 }
00944
00945 foreach($snippets as $index => $line){
00946 $extended[$index] = $line;
00947 $len = strlen($line);
00948 if( $len < $targetchars - 20 ){
00949
00950 if($len < strlen( $all[$index] )){
00951 $extended[$index] = $this->extract( $all[$index], $offsets[$index], $offsets[$index]+$targetchars, $offsets[$index]);
00952 $len = strlen( $extended[$index] );
00953 }
00954
00955
00956 $add = $index + 1;
00957 while( $len < $targetchars - 20
00958 && array_key_exists($add,$all)
00959 && !array_key_exists($add,$snippets) ){
00960 $offsets[$add] = 0;
00961 $tt = "\n".$this->extract( $all[$add], 0, $targetchars - $len, $offsets[$add] );
00962 $extended[$add] = $tt;
00963 $len += strlen( $tt );
00964 $add++;
00965 }
00966 }
00967 }
00968
00969
00970 $snippets = $extended;
00971 $last = -1;
00972 $extract = '';
00973 foreach($snippets as $index => $line){
00974 if($last == -1)
00975 $extract .= $line;
00976 elseif($last+1 == $index && $offsets[$last]+strlen($snippets[$last]) >= strlen($all[$last]))
00977 $extract .= " ".$line;
00978 else
00979 $extract .= '<b> ... </b>' . $line;
00980
00981 $last = $index;
00982 }
00983 if( $extract )
00984 $extract .= '<b> ... </b>';
00985
00986 $processed = array();
00987 foreach($terms as $term){
00988 if( ! isset($processed[$term]) ){
00989 $pat3 = "/$patPre(".$term.")$patPost/ui";
00990 $extract = preg_replace( $pat3,
00991 "\\1<span class='searchmatch'>\\2</span>\\3", $extract );
00992 $processed[$term] = true;
00993 }
00994 }
00995
00996 wfProfileOut( "$fname-extract" );
00997
00998 return $extract;
00999 }
01000
01008 function splitAndAdd(&$extracts, &$count, $text){
01009 $split = explode( "\n", $this->mCleanWikitext? $this->removeWiki($text) : $text );
01010 foreach($split as $line){
01011 $tt = trim($line);
01012 if( $tt )
01013 $extracts[$count++] = $tt;
01014 }
01015 }
01016
01022 function caseCallback($matches){
01023 global $wgContLang;
01024 if( strlen($matches[0]) > 1 ){
01025 return '['.$wgContLang->lc($matches[0]).$wgContLang->uc($matches[0]).']';
01026 } else
01027 return $matches[0];
01028 }
01029
01040 function extract($text, $start, $end, &$posStart = null, &$posEnd = null ){
01041 global $wgContLang;
01042
01043 if( $start != 0)
01044 $start = $this->position( $text, $start, 1 );
01045 if( $end >= strlen($text) )
01046 $end = strlen($text);
01047 else
01048 $end = $this->position( $text, $end );
01049
01050 if(!is_null($posStart))
01051 $posStart = $start;
01052 if(!is_null($posEnd))
01053 $posEnd = $end;
01054
01055 if($end > $start)
01056 return substr($text, $start, $end-$start);
01057 else
01058 return '';
01059 }
01060
01069 function position($text, $point, $offset=0 ){
01070 $tolerance = 10;
01071 $s = max( 0, $point - $tolerance );
01072 $l = min( strlen($text), $point + $tolerance ) - $s;
01073 $m = array();
01074 if( preg_match('/[ ,.!?~!@#$%^&*\(\)+=\-\\\|\[\]"\'<>]/', substr($text,$s,$l), $m, PREG_OFFSET_CAPTURE ) ){
01075 return $m[0][1] + $s + $offset;
01076 } else{
01077
01078 $char = ord( $text[$point] );
01079 while( $char >= 0x80 && $char < 0xc0 ) {
01080
01081 $point++;
01082 if($point >= strlen($text))
01083 return strlen($text);
01084 $char = ord( $text[$point] );
01085 }
01086 return $point;
01087
01088 }
01089 }
01090
01102 function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ){
01103 if($linesleft == 0)
01104 return;
01105 foreach($extracts as $index => $line){
01106 if( array_key_exists($index,$out) )
01107 continue;
01108
01109 $m = array();
01110 if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) )
01111 continue;
01112
01113 $offset = $m[0][1];
01114 $len = strlen($m[0][0]);
01115 if($offset + $len < $contextchars)
01116 $begin = 0;
01117 elseif( $len > $contextchars)
01118 $begin = $offset;
01119 else
01120 $begin = $offset + intval( ($len - $contextchars) / 2 );
01121
01122 $end = $begin + $contextchars;
01123
01124 $posBegin = $begin;
01125
01126 $out[$index] = $this->extract($line,$begin,$end,$posBegin);
01127 $offsets[$index] = $posBegin;
01128 $linesleft--;
01129 if($linesleft == 0)
01130 return;
01131 }
01132 }
01133
01138 function removeWiki($text) {
01139 $fname = __METHOD__;
01140 wfProfileIn( $fname );
01141
01142
01143
01144
01145
01146
01147
01148 $text = preg_replace("/\\{\\{([^|]+?)\\}\\}/", "", $text);
01149 $text = preg_replace("/\\{\\{([^|]+\\|)(.*?)\\}\\}/", "\\2", $text);
01150 $text = preg_replace("/\\[\\[([^|]+?)\\]\\]/", "\\1", $text);
01151 $text = preg_replace_callback("/\\[\\[([^|]+\\|)(.*?)\\]\\]/", array($this,'linkReplace'), $text);
01152
01153 $text = preg_replace("/<\/?[^>]+>/", "", $text);
01154 $text = preg_replace("/'''''/", "", $text);
01155 $text = preg_replace("/('''|<\/?[iIuUbB]>)/", "", $text);
01156 $text = preg_replace("/''/", "", $text);
01157
01158 wfProfileOut( $fname );
01159 return $text;
01160 }
01161
01168 function linkReplace($matches){
01169 $colon = strpos( $matches[1], ':' );
01170 if( $colon === false )
01171 return $matches[2];
01172 global $wgContLang;
01173 $ns = substr( $matches[1], 0, $colon );
01174 $index = $wgContLang->getNsIndex($ns);
01175 if( $index !== false && ($index == NS_FILE || $index == NS_CATEGORY) )
01176 return $matches[0];
01177 else
01178 return $matches[2];
01179
01180 }
01181
01192 public function highlightSimple( $text, $terms, $contextlines, $contextchars ) {
01193 global $wgLang, $wgContLang;
01194 $fname = __METHOD__;
01195
01196 $lines = explode( "\n", $text );
01197
01198 $terms = implode( '|', $terms );
01199 $max = intval( $contextchars ) + 1;
01200 $pat1 = "/(.*)($terms)(.{0,$max})/i";
01201
01202 $lineno = 0;
01203
01204 $extract = "";
01205 wfProfileIn( "$fname-extract" );
01206 foreach ( $lines as $line ) {
01207 if ( 0 == $contextlines ) {
01208 break;
01209 }
01210 ++$lineno;
01211 $m = array();
01212 if ( ! preg_match( $pat1, $line, $m ) ) {
01213 continue;
01214 }
01215 --$contextlines;
01216 $pre = $wgContLang->truncate( $m[1], -$contextchars );
01217
01218 if ( count( $m ) < 3 ) {
01219 $post = '';
01220 } else {
01221 $post = $wgContLang->truncate( $m[3], $contextchars );
01222 }
01223
01224 $found = $m[2];
01225
01226 $line = htmlspecialchars( $pre . $found . $post );
01227 $pat2 = '/(' . $terms . ")/i";
01228 $line = preg_replace( $pat2,
01229 "<span class='searchmatch'>\\1</span>", $line );
01230
01231 $extract .= "${line}\n";
01232 }
01233 wfProfileOut( "$fname-extract" );
01234
01235 return $extract;
01236 }
01237
01238 }
01239
01246 class SearchEngineDummy extends SearchEngine {
01247
01248 }