00001 <?php
00002
00003 require_once( dirname( __FILE__ ) . '/../Maintenance.php' );
00004
00005 class FixBug20757 extends Maintenance {
00006 var $batchSize = 10000;
00007 var $mapCache = array();
00008 var $mapCacheSize = 0;
00009 var $maxMapCacheSize = 1000000;
00010
00011 function __construct() {
00012 parent::__construct();
00013 $this->mDescription = 'Script to fix bug 20757 assuming that blob_tracking is intact';
00014 $this->addOption( 'dry-run', 'Report only' );
00015 $this->addOption( 'start', 'old_id to start at', false, true );
00016 }
00017
00018 function execute() {
00019 $dbr = wfGetDB( DB_SLAVE );
00020 $dbw = wfGetDB( DB_MASTER );
00021
00022 $dryRun = $this->getOption( 'dry-run' );
00023 if ( $dryRun ) {
00024 print "Dry run only.\n";
00025 }
00026
00027 $startId = $this->getOption( 'start', 0 );
00028 $numGood = 0;
00029 $numFixed = 0;
00030 $numBad = 0;
00031
00032 $totalRevs = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ );
00033
00034 while ( true ) {
00035 print "ID: $startId / $totalRevs\r";
00036
00037 $res = $dbr->select(
00038 'text',
00039 array( 'old_id', 'old_flags', 'old_text' ),
00040 array(
00041 'old_id > ' . intval( $startId ),
00042 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'',
00043 'LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'',
00044 ),
00045 __METHOD__,
00046 array(
00047 'ORDER BY' => 'old_id',
00048 'LIMIT' => $this->batchSize,
00049 )
00050 );
00051
00052 if ( !$res->numRows() ) {
00053 break;
00054 }
00055
00056 $secondaryIds = array();
00057 $stubs = array();
00058
00059 foreach ( $res as $row ) {
00060 $startId = $row->old_id;
00061
00062
00063 $obj = unserialize( $row->old_text );
00064 if ( $obj === false ) {
00065 print "{$row->old_id}: unrecoverable: cannot unserialize\n";
00066 ++$numBad;
00067 continue;
00068 }
00069
00070 if ( !is_object( $obj ) ) {
00071 print "{$row->old_id}: unrecoverable: unserialized to type " .
00072 gettype( $obj ) . ", possible double-serialization\n";
00073 ++$numBad;
00074 continue;
00075 }
00076
00077 if ( strtolower( get_class( $obj ) ) !== 'historyblobstub' ) {
00078 print "{$row->old_id}: unrecoverable: unexpected object class " .
00079 get_class( $obj ) . "\n";
00080 ++$numBad;
00081 continue;
00082 }
00083
00084
00085 $flags = explode( ',', $row->old_flags );
00086 if ( in_array( 'utf-8', $flags ) || in_array( 'utf8', $flags ) ) {
00087 $legacyEncoding = false;
00088 } else {
00089 $legacyEncoding = true;
00090 }
00091
00092
00093 $id = intval( $obj->mOldId );
00094 $secondaryIds[] = $id;
00095 $stubs[$row->old_id] = array(
00096 'legacyEncoding' => $legacyEncoding,
00097 'secondaryId' => $id,
00098 'hash' => $obj->mHash,
00099 );
00100 }
00101
00102 $secondaryIds = array_unique( $secondaryIds );
00103
00104 if ( !count( $secondaryIds ) ) {
00105 continue;
00106 }
00107
00108
00109 $res = $dbr->select(
00110 'blob_tracking',
00111 '*',
00112 array(
00113 'bt_text_id' => $secondaryIds,
00114 ),
00115 __METHOD__
00116 );
00117 $trackedBlobs = array();
00118 foreach ( $res as $row ) {
00119 $trackedBlobs[$row->bt_text_id] = $row;
00120 }
00121
00122
00123 $stubsToFix = array();
00124 foreach ( $stubs as $primaryId => $stub ) {
00125 $secondaryId = $stub['secondaryId'];
00126 if ( !isset( $trackedBlobs[$secondaryId] ) ) {
00127
00128 $secondaryRow = $dbr->selectRow(
00129 'text',
00130 array( 'old_flags', 'old_text' ),
00131 array( 'old_id' => $secondaryId ),
00132 __METHOD__
00133 );
00134 if ( !$secondaryRow ) {
00135 print "$primaryId: unrecoverable: secondary row is missing\n";
00136 ++$numBad;
00137 } elseif ( $this->isUnbrokenStub( $stub, $secondaryRow ) ) {
00138
00139
00140 ++$numGood;
00141 } elseif ( strpos( $secondaryRow->old_flags, 'external' ) !== false ) {
00142 print "$primaryId: unrecoverable: secondary gone to {$secondaryRow->old_text}\n";
00143 ++$numBad;
00144 } else {
00145 print "$primaryId: unrecoverable: miscellaneous corruption of secondary row\n";
00146 ++$numBad;
00147 }
00148 unset( $stubs[$primaryId] );
00149 continue;
00150 }
00151 $trackRow = $trackedBlobs[$secondaryId];
00152
00153
00154 $url = "DB://{$trackRow->bt_cluster}/{$trackRow->bt_blob_id}/{$stub['hash']}";
00155 $text = ExternalStore::fetchFromURL( $url );
00156 if ( $text === false ) {
00157 print "$primaryId: unrecoverable: source text missing\n";
00158 ++$numBad;
00159 unset( $stubs[$primaryId] );
00160 continue;
00161 }
00162 if ( md5( $text ) !== $stub['hash'] ) {
00163 print "$primaryId: unrecoverable: content hashes do not match\n";
00164 ++$numBad;
00165 unset( $stubs[$primaryId] );
00166 continue;
00167 }
00168
00169
00170
00171 $pageId = intval( $trackRow->bt_page );
00172 if ( !$pageId ) {
00173 $revId = $pageId = 0;
00174 } else {
00175 $revId = $this->findTextIdInPage( $pageId, $primaryId );
00176 if ( !$revId ) {
00177
00178 $pageId = $revId = 0;
00179 }
00180 }
00181
00182 $newFlags = $stub['legacyEncoding'] ? 'external' : 'external,utf-8';
00183
00184 if ( !$dryRun ) {
00185
00186 $dbw->begin();
00187 $dbw->update(
00188 'text',
00189
00190 array(
00191 'old_flags' => $newFlags,
00192 'old_text' => $url
00193 ),
00194
00195 array( 'old_id' => $primaryId ),
00196 __METHOD__
00197 );
00198
00199
00200
00201 $dbw->insert( 'blob_tracking',
00202 array(
00203 'bt_page' => $pageId,
00204 'bt_rev_id' => $revId,
00205 'bt_text_id' => $primaryId,
00206 'bt_cluster' => $trackRow->bt_cluster,
00207 'bt_blob_id' => $trackRow->bt_blob_id,
00208 'bt_cgz_hash' => $stub['hash'],
00209 'bt_new_url' => null,
00210 'bt_moved' => 0,
00211 ),
00212 __METHOD__
00213 );
00214 $dbw->commit();
00215 $this->waitForSlaves();
00216 }
00217
00218 print "$primaryId: resolved to $url\n";
00219 ++$numFixed;
00220 }
00221 }
00222
00223 print "\n";
00224 print "Fixed: $numFixed\n";
00225 print "Unrecoverable: $numBad\n";
00226 print "Good stubs: $numGood\n";
00227 }
00228
00229 function waitForSlaves() {
00230 static $iteration = 0;
00231 ++$iteration;
00232 if ( ++$iteration > 50 == 0 ) {
00233 wfWaitForSlaves( 5 );
00234 $iteration = 0;
00235 }
00236 }
00237
00238 function findTextIdInPage( $pageId, $textId ) {
00239 $ids = $this->getRevTextMap( $pageId );
00240 if ( !isset( $ids[$textId] ) ) {
00241 return null;
00242 } else {
00243 return $ids[$textId];
00244 }
00245 }
00246
00247 function getRevTextMap( $pageId ) {
00248 if ( !isset( $this->mapCache[$pageId] ) ) {
00249
00250 while ( $this->mapCacheSize > $this->maxMapCacheSize ) {
00251 $key = key( $this->mapCache );
00252 $this->mapCacheSize -= count( $this->mapCache[$key] );
00253 unset( $this->mapCache[$key] );
00254 }
00255
00256 $dbr = wfGetDB( DB_SLAVE );
00257 $map = array();
00258 $res = $dbr->select( 'revision',
00259 array( 'rev_id', 'rev_text_id' ),
00260 array( 'rev_page' => $pageId ),
00261 __METHOD__
00262 );
00263 foreach ( $res as $row ) {
00264 $map[$row->rev_text_id] = $row->rev_id;
00265 }
00266 $this->mapCache[$pageId] = $map;
00267 $this->mapCacheSize += count( $map );
00268 }
00269 return $this->mapCache[$pageId];
00270 }
00271
00276 function isUnbrokenStub( $stub, $secondaryRow ) {
00277 $flags = explode( ',', $secondaryRow->old_flags );
00278 $text = $secondaryRow->old_text;
00279 if( in_array( 'external', $flags ) ) {
00280 $url = $text;
00281 @list( , $path ) = explode( '://', $url, 2 );
00282 if ( $path == "" ) {
00283 return false;
00284 }
00285 $text = ExternalStore::fetchFromUrl( $url );
00286 }
00287 if( !in_array( 'object', $flags ) ) {
00288 return false;
00289 }
00290
00291 if( in_array( 'gzip', $flags ) ) {
00292 $obj = unserialize( gzinflate( $text ) );
00293 } else {
00294 $obj = unserialize( $text );
00295 }
00296
00297 if( !is_object( $obj ) ) {
00298
00299 $obj = unserialize( $obj );
00300 }
00301
00302 if ( !is_object( $obj ) ) {
00303 return false;
00304 }
00305
00306 $obj->uncompress();
00307 $text = $obj->getItem( $stub['hash'] );
00308 return $text !== false;
00309 }
00310 }
00311
00312 $maintClass = 'FixBug20757';
00313 require_once( DO_MAINTENANCE );
00314