00001 <?php
00002 define( 'GS_MAIN', -2 );
00003 define( 'GS_TALK', -1 );
00034 require_once( dirname(__FILE__) . '/Maintenance.php' );
00035
00036 class GenerateSitemap extends Maintenance {
00044 var $url_limit;
00045
00053 var $size_limit;
00054
00060 var $fspath;
00061
00067 var $path;
00068
00074 var $compress;
00075
00081 var $limit = array();
00082
00088 var $priorities = array();
00089
00095 var $namespaces = array();
00096
00102 var $timestamp;
00103
00109 var $dbr;
00110
00116 var $findex;
00117
00118
00124 var $file;
00125
00129 public function __construct() {
00130 parent::__construct();
00131 $this->mDescription = "Creates a sitemap for the site";
00132 $this->addOption( 'fspath', 'The file system path to save to, e.g. /tmp/sitemap' .
00133 "\n\t\tdefaults to current directory", false, true );
00134 $this->addOption( 'compress', 'Compress the sitemap files, can take value yes|no, default yes', false, true );
00135 }
00136
00140 public function execute() {
00141 global $wgScriptPath;
00142 $this->setNamespacePriorities();
00143 $this->url_limit = 50000;
00144 $this->size_limit = pow( 2, 20 ) * 10;
00145 $this->fspath = self::init_path( $this->getOption( 'fspath', getcwd() ) );
00146 $this->compress = $this->getOption( 'compress', 'yes' ) !== 'no';
00147 $this->dbr = wfGetDB( DB_SLAVE );
00148 $this->generateNamespaces();
00149 $this->timestamp = wfTimestamp( TS_ISO_8601, wfTimestampNow() );
00150 $this->findex = fopen( "{$this->fspath}sitemap-index-" . wfWikiID() . ".xml", 'wb' );
00151 $this->main();
00152 }
00153
00154 private function setNamespacePriorities() {
00155
00156 $this->priorities[GS_MAIN] = '0.5';
00157
00158 $this->priorities[GS_TALK] = '0.1';
00159
00160 $this->priorities[NS_MAIN] = '1.0';
00161 $this->priorities[NS_TALK] = '0.1';
00162 $this->priorities[NS_USER] = '0.5';
00163 $this->priorities[NS_USER_TALK] = '0.1';
00164 $this->priorities[NS_PROJECT] = '0.5';
00165 $this->priorities[NS_PROJECT_TALK] = '0.1';
00166 $this->priorities[NS_FILE] = '0.5';
00167 $this->priorities[NS_FILE_TALK] = '0.1';
00168 $this->priorities[NS_MEDIAWIKI] = '0.0';
00169 $this->priorities[NS_MEDIAWIKI_TALK] = '0.1';
00170 $this->priorities[NS_TEMPLATE] = '0.0';
00171 $this->priorities[NS_TEMPLATE_TALK] = '0.1';
00172 $this->priorities[NS_HELP] = '0.5';
00173 $this->priorities[NS_HELP_TALK] = '0.1';
00174 $this->priorities[NS_CATEGORY] = '0.5';
00175 $this->priorities[NS_CATEGORY_TALK] = '0.1';
00176 }
00177
00181 private static function init_path( $fspath ) {
00182 if( !isset( $fspath ) ) {
00183 return null;
00184 }
00185 # Create directory if needed
00186 if( $fspath && !is_dir( $fspath ) ) {
00187 wfMkdirParents( $fspath ) or die("Can not create directory $fspath.\n");
00188 }
00189
00190 return realpath( $fspath ). DIRECTORY_SEPARATOR ;
00191 }
00192
00196 function generateNamespaces() {
00197
00198 global $wgSitemapNamespaces;
00199 if( is_array( $wgSitemapNamespaces ) ) {
00200 $this->namespaces = $wgSitemapNamespaces;
00201 return;
00202 }
00203
00204 $res = $this->dbr->select( 'page',
00205 array( 'page_namespace' ),
00206 array(),
00207 __METHOD__,
00208 array(
00209 'GROUP BY' => 'page_namespace',
00210 'ORDER BY' => 'page_namespace',
00211 )
00212 );
00213
00214 foreach ( $res as $row )
00215 $this->namespaces[] = $row->page_namespace;
00216 }
00217
00226 function priority( $namespace ) {
00227 return isset( $this->priorities[$namespace] ) ? $this->priorities[$namespace] : $this->guessPriority( $namespace );
00228 }
00229
00239 function guessPriority( $namespace ) {
00240 return MWNamespace::isMain( $namespace ) ? $this->priorities[GS_MAIN] : $this->priorities[GS_TALK];
00241 }
00242
00250 function getPageRes( $namespace ) {
00251 return $this->dbr->select( 'page',
00252 array(
00253 'page_namespace',
00254 'page_title',
00255 'page_touched',
00256 ),
00257 array( 'page_namespace' => $namespace ),
00258 __METHOD__
00259 );
00260 }
00261
00267 function main() {
00268 global $wgContLang;
00269
00270 fwrite( $this->findex, $this->openIndex() );
00271
00272 foreach ( $this->namespaces as $namespace ) {
00273 $res = $this->getPageRes( $namespace );
00274 $this->file = false;
00275 $this->generateLimit( $namespace );
00276 $length = $this->limit[0];
00277 $i = $smcount = 0;
00278
00279 $fns = $wgContLang->getFormattedNsText( $namespace );
00280 $this->output( "$namespace ($fns)" );
00281 foreach ( $res as $row ) {
00282 if ( $i++ === 0 || $i === $this->url_limit + 1 || $length + $this->limit[1] + $this->limit[2] > $this->size_limit ) {
00283 if ( $this->file !== false ) {
00284 $this->write( $this->file, $this->closeFile() );
00285 $this->close( $this->file );
00286 }
00287 $filename = $this->sitemapFilename( $namespace, $smcount++ );
00288 $this->file = $this->open( $this->fspath . $filename, 'wb' );
00289 $this->write( $this->file, $this->openFile() );
00290 fwrite( $this->findex, $this->indexEntry( $filename ) );
00291 $this->output( "\t$this->fspath$filename\n" );
00292 $length = $this->limit[0];
00293 $i = 1;
00294 }
00295 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
00296 $date = wfTimestamp( TS_ISO_8601, $row->page_touched );
00297 $entry = $this->fileEntry( $title->getFullURL(), $date, $this->priority( $namespace ) );
00298 $length += strlen( $entry );
00299 $this->write( $this->file, $entry );
00300
00301 if($wgContLang->hasVariants()){
00302 $variants = $wgContLang->getVariants();
00303 foreach($variants as $vCode){
00304 if($vCode==$wgContLang->getCode()) continue;
00305 $entry = $this->fileEntry( $title->getFullURL('',$vCode), $date, $this->priority( $namespace ) );
00306 $length += strlen( $entry );
00307 $this->write( $this->file, $entry );
00308 }
00309 }
00310 }
00311 if ( $this->file ) {
00312 $this->write( $this->file, $this->closeFile() );
00313 $this->close( $this->file );
00314 }
00315 }
00316 fwrite( $this->findex, $this->closeIndex() );
00317 fclose( $this->findex );
00318 }
00319
00325 function open( $file, $flags ) {
00326 return $this->compress ? gzopen( $file, $flags ) : fopen( $file, $flags );
00327 }
00328
00332 function write( &$handle, $str ) {
00333 if ( $this->compress )
00334 gzwrite( $handle, $str );
00335 else
00336 fwrite( $handle, $str );
00337 }
00338
00342 function close( &$handle ) {
00343 if ( $this->compress )
00344 gzclose( $handle );
00345 else
00346 fclose( $handle );
00347 }
00348
00359 function sitemapFilename( $namespace, $count ) {
00360 $ext = $this->compress ? '.gz' : '';
00361 return "sitemap-".wfWikiID()."-NS_$namespace-$count.xml$ext";
00362 }
00363
00371 function xmlHead() {
00372 return '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
00373 }
00374
00382 function xmlSchema() {
00383 return 'http://www.sitemaps.org/schemas/sitemap/0.9';
00384 }
00385
00391 function openIndex() {
00392 return $this->xmlHead() . '<sitemapindex xmlns="' . $this->xmlSchema() . '">' . "\n";
00393 }
00394
00404 function indexEntry( $filename ) {
00405 return
00406 "\t<sitemap>\n" .
00407 "\t\t<loc>$filename</loc>\n" .
00408 "\t\t<lastmod>{$this->timestamp}</lastmod>\n" .
00409 "\t</sitemap>\n";
00410 }
00411
00419 function closeIndex() {
00420 return "</sitemapindex>\n";
00421 }
00422
00428 function openFile() {
00429 return $this->xmlHead() . '<urlset xmlns="' . $this->xmlSchema() . '">' . "\n";
00430 }
00431
00443 function fileEntry( $url, $date, $priority ) {
00444 return
00445 "\t<url>\n" .
00446 "\t\t<loc>$url</loc>\n" .
00447 "\t\t<lastmod>$date</lastmod>\n" .
00448 "\t\t<priority>$priority</priority>\n" .
00449 "\t</url>\n";
00450 }
00451
00458 function closeFile() {
00459 return "</urlset>\n";
00460 }
00461
00465 function generateLimit( $namespace ) {
00466 $title = Title::makeTitle( $namespace, str_repeat( "\xf0\xa8\xae\x81", 63 ) . "\xe5\x96\x83" );
00467
00468 $this->limit = array(
00469 strlen( $this->openFile() ),
00470 strlen( $this->fileEntry( $title->getFullUrl(), wfTimestamp( TS_ISO_8601, wfTimestamp() ), $this->priority( $namespace ) ) ),
00471 strlen( $this->closeFile() )
00472 );
00473 }
00474 }
00475
00476 $maintClass = "GenerateSitemap";
00477 require_once( DO_MAINTENANCE );