00001 <?php 00016 class Site { 00017 var $suffix, $lateral, $url; 00018 00019 function __construct( $s, $l, $u ) { 00020 $this->suffix = $s; 00021 $this->lateral = $l; 00022 $this->url = $u; 00023 } 00024 00025 function getURL( $lang ) { 00026 $xlang = str_replace( '_', '-', $lang ); 00027 return "http://$xlang.{$this->url}/wiki/\$1"; 00028 } 00029 } 00030 00031 function makeInterwikiSQL( $destDir ) { 00032 global $langlist, $languageAliases, $prefixRewrites; 00033 00034 # Multi-language sites 00035 # db suffix => db suffix, iw prefix, hostname 00036 $sites = array( 00037 'wiki' => new Site( 'wiki', 'w', 'wikipedia.org' ), 00038 'wiktionary' => new Site( 'wiktionary', 'wikt', 'wiktionary.org' ), 00039 'wikiquote' => new Site( 'wikiquote', 'q', 'wikiquote.org' ), 00040 'wikibooks' => new Site( 'wikibooks', 'b', 'wikibooks.org' ), 00041 'wikinews' => new Site( 'wikinews', 'n', 'wikinews.org' ), 00042 'wikisource' => new Site( 'wikisource', 's', 'wikisource.org' ), 00043 'wikimedia' => new Site( 'wikimedia', 'chapter', 'wikimedia.org' ), 00044 'wikiversity' => new Site( 'wikiversity', 'v', 'wikiversity.org' ), 00045 ); 00046 00047 # List of language prefixes likely to be found in multi-language sites 00048 $langlist = array_map( "trim", file( "/home/wikipedia/common/langlist" ) ); 00049 00050 # List of all database names 00051 $dblist = array_map( "trim", file( "/home/wikipedia/common/all.dblist" ) ); 00052 00053 # Special-case hostnames 00054 $specials = array( 00055 'sourceswiki' => 'sources.wikipedia.org', 00056 'quotewiki' => 'wikiquote.org', 00057 'textbookwiki' => 'wikibooks.org', 00058 'sep11wiki' => 'sep11.wikipedia.org', 00059 'metawiki' => 'meta.wikimedia.org', 00060 'commonswiki' => 'commons.wikimedia.org', 00061 'specieswiki' => 'species.wikimedia.org', 00062 ); 00063 00064 # Extra interwiki links that can't be in the intermap for some reason 00065 $extraLinks = array( 00066 array( 'm', 'http://meta.wikimedia.org/wiki/$1', 1 ), 00067 array( 'meta', 'http://meta.wikimedia.org/wiki/$1', 1 ), 00068 array( 'sep11', 'http://sep11.wikipedia.org/wiki/$1', 1 ), 00069 ); 00070 00071 # Language aliases, usually configured as redirects to the real wiki in apache 00072 # Interlanguage links are made directly to the real wiki 00073 # Something horrible happens if you forget to list an alias here, I can't 00074 # remember what 00075 $languageAliases = array( 00076 'zh-cn' => 'zh', 00077 'zh-tw' => 'zh', 00078 'dk' => 'da', 00079 'nb' => 'no', 00080 ); 00081 00082 # Special case prefix rewrites, for the benefit of Swedish which uses s:t 00083 # as an abbreviation for saint 00084 $prefixRewrites = array( 00085 'svwiki' => array( 's' => 'src' ), 00086 ); 00087 00088 # Construct a list of reserved prefixes 00089 $reserved = array(); 00090 foreach ( $langlist as $lang ) { 00091 $reserved[$lang] = 1; 00092 } 00093 foreach ( $languageAliases as $alias => $lang ) { 00094 $reserved[$alias] = 1; 00095 } 00096 foreach( $sites as $site ) { 00097 $reserved[$site->lateral] = 1; 00098 } 00099 00100 # Extract the intermap from meta 00101 $intermap = Http::get( 'http://meta.wikimedia.org/w/index.php?title=Interwiki_map&action=raw', 30 ); 00102 $lines = array_map( 'trim', explode( "\n", trim( $intermap ) ) ); 00103 00104 if ( !$lines || count( $lines ) < 2 ) { 00105 wfDie( "m:Interwiki_map not found" ); 00106 } 00107 00108 $iwArray = array(); 00109 00110 foreach ( $lines as $line ) { 00111 $matches = array(); 00112 if ( preg_match( '/^\|\s*(.*?)\s*\|\|\s*(https?:\/\/.*?)\s*$/', $line, $matches ) ) { 00113 $prefix = strtolower( $matches[1] ); 00114 $url = $matches[2]; 00115 if ( preg_match( '/(wikipedia|wiktionary|wikisource|wikiquote|wikibooks|wikimedia)\.org/', $url ) ) { 00116 $local = 1; 00117 } else { 00118 $local = 0; 00119 } 00120 00121 if ( empty( $reserved[$prefix] ) ) { 00122 $iwArray[$prefix] = array( "iw_prefix" => $prefix, "iw_url" => $url, "iw_local" => $local ); 00123 } 00124 } 00125 } 00126 00127 00128 00129 foreach ( $dblist as $db ) { 00130 $sql = "-- Generated by rebuildInterwiki.php"; 00131 if ( isset( $specials[$db] ) ) { 00132 # Special wiki 00133 # Has interwiki links and interlanguage links to wikipedia 00134 00135 $host = $specials[$db]; 00136 $sql .= "\n--$host\n\n"; 00137 $sql .= "USE $db;\n" . 00138 "TRUNCATE TABLE interwiki;\n" . 00139 "INSERT INTO interwiki (iw_prefix, iw_url, iw_local) VALUES \n"; 00140 $first = true; 00141 00142 # Intermap links 00143 foreach ( $iwArray as $iwEntry ) { 00144 $sql .= makeLink( $iwEntry, $first, $db ); 00145 } 00146 00147 # Links to multilanguage sites 00148 foreach ( $sites as $targetSite ) { 00149 $sql .= makeLink( array( $targetSite->lateral, $targetSite->getURL( 'en' ), 1 ), $first, $db ); 00150 } 00151 00152 # Interlanguage links to wikipedia 00153 $sql .= makeLanguageLinks( $sites['wiki'], $first, $db ); 00154 00155 # Extra links 00156 foreach ( $extraLinks as $link ) { 00157 $sql .= makeLink( $link, $first, $db ); 00158 } 00159 00160 $sql .= ";\n"; 00161 } else { 00162 # Find out which site this DB belongs to 00163 $site = false; 00164 foreach( $sites as $candidateSite ) { 00165 $suffix = $candidateSite->suffix; 00166 if ( preg_match( "/(.*)$suffix$/", $db, $matches ) ) { 00167 $site = $candidateSite; 00168 break; 00169 } 00170 } 00171 if ( !$site ) { 00172 print "Invalid database $db\n"; 00173 continue; 00174 } 00175 $lang = $matches[1]; 00176 $host = "$lang." . $site->url; 00177 $sql .= "\n--$host\n\n"; 00178 00179 $sql .= "USE $db;\n" . 00180 "TRUNCATE TABLE interwiki;\n" . 00181 "INSERT INTO interwiki (iw_prefix,iw_url,iw_local) VALUES\n"; 00182 $first = true; 00183 00184 # Intermap links 00185 foreach ( $iwArray as $iwEntry ) { 00186 # Suppress links with the same name as the site 00187 if ( ( $suffix == 'wiki' && $iwEntry['iw_prefix'] != 'wikipedia' ) || 00188 ( $suffix != 'wiki' && $suffix != $iwEntry['iw_prefix'] ) ) 00189 { 00190 $sql .= makeLink( $iwEntry, $first, $db ); 00191 } 00192 } 00193 00194 # Lateral links 00195 foreach ( $sites as $targetSite ) { 00196 # Suppress link to self 00197 if ( $targetSite->suffix != $site->suffix ) { 00198 $sql .= makeLink( array( $targetSite->lateral, $targetSite->getURL( $lang ), 1 ), $first, $db ); 00199 } 00200 } 00201 00202 # Interlanguage links 00203 $sql .= makeLanguageLinks( $site, $first, $db ); 00204 00205 # w link within wikipedias 00206 # Other sites already have it as a lateral link 00207 if ( $site->suffix == "wiki" ) { 00208 $sql .= makeLink( array("w", "http://en.wikipedia.org/wiki/$1", 1), $first, $db ); 00209 } 00210 00211 # Extra links 00212 foreach ( $extraLinks as $link ){ 00213 $sql .= makeLink( $link, $first, $db ); 00214 } 00215 $sql .= ";\n"; 00216 } 00217 file_put_contents( "$destDir/$db.sql", $sql ); 00218 } 00219 } 00220 00221 # ------------------------------------------------------------------------------------------ 00222 00223 # Returns part of an INSERT statement, corresponding to all interlanguage links to a particular site 00224 function makeLanguageLinks( &$site, &$first, $source ) { 00225 global $langlist, $languageAliases; 00226 00227 $sql = ""; 00228 00229 # Actual languages with their own databases 00230 foreach ( $langlist as $targetLang ) { 00231 $sql .= makeLink( array( $targetLang, $site->getURL( $targetLang ), 1 ), $first, $source ); 00232 } 00233 00234 # Language aliases 00235 foreach ( $languageAliases as $alias => $lang ) { 00236 $sql .= makeLink( array( $alias, $site->getURL( $lang ), 1 ), $first, $source ); 00237 } 00238 return $sql; 00239 } 00240 00241 # Make SQL for a single link from an array 00242 function makeLink( $entry, &$first, $source ) { 00243 global $prefixRewrites; 00244 00245 if ( isset( $prefixRewrites[$source] ) && isset( $prefixRewrites[$source][$entry[0]] ) ) { 00246 $entry[0] = $prefixRewrites[$source][$entry[0]]; 00247 } 00248 00249 $sql = ""; 00250 # Add comma 00251 if ( $first ) { 00252 $first = false; 00253 } else { 00254 $sql .= ",\n"; 00255 } 00256 $dbr = wfGetDB( DB_SLAVE ); 00257 $sql .= "(" . $dbr->makeList( $entry ) . ")"; 00258 return $sql; 00259 }