00001 <?php 00002 00026 class HTMLCacheUpdate 00027 { 00028 public $mTitle, $mTable, $mPrefix, $mStart, $mEnd; 00029 public $mRowsPerJob, $mRowsPerQuery; 00030 00031 function __construct( $titleTo, $table, $start = false, $end = false ) { 00032 global $wgUpdateRowsPerJob, $wgUpdateRowsPerQuery; 00033 00034 $this->mTitle = $titleTo; 00035 $this->mTable = $table; 00036 $this->mStart = $start; 00037 $this->mEnd = $end; 00038 $this->mRowsPerJob = $wgUpdateRowsPerJob; 00039 $this->mRowsPerQuery = $wgUpdateRowsPerQuery; 00040 $this->mCache = $this->mTitle->getBacklinkCache(); 00041 } 00042 00043 public function doUpdate() { 00044 if ( $this->mStart || $this->mEnd ) { 00045 $this->doPartialUpdate(); 00046 return; 00047 } 00048 00049 # Get an estimate of the number of rows from the BacklinkCache 00050 $numRows = $this->mCache->getNumLinks( $this->mTable ); 00051 if ( $numRows > $this->mRowsPerJob * 2 ) { 00052 # Do fast cached partition 00053 $this->insertJobs(); 00054 } else { 00055 # Get the links from the DB 00056 $titleArray = $this->mCache->getLinks( $this->mTable ); 00057 # Check if the row count estimate was correct 00058 if ( $titleArray->count() > $this->mRowsPerJob * 2 ) { 00059 # Not correct, do accurate partition 00060 wfDebug( __METHOD__.": row count estimate was incorrect, repartitioning\n" ); 00061 $this->insertJobsFromTitles( $titleArray ); 00062 } else { 00063 $this->invalidateTitles( $titleArray ); 00064 } 00065 } 00066 wfRunHooks( 'HTMLCacheUpdate::doUpdate', array($this->mTitle) ); 00067 } 00068 00072 protected function doPartialUpdate() { 00073 $titleArray = $this->mCache->getLinks( $this->mTable, $this->mStart, $this->mEnd ); 00074 if ( $titleArray->count() <= $this->mRowsPerJob * 2 ) { 00075 # This partition is small enough, do the update 00076 $this->invalidateTitles( $titleArray ); 00077 } else { 00078 # Partitioning was excessively inaccurate. Divide the job further. 00079 # This can occur when a large number of links are added in a short 00080 # period of time, say by updating a heavily-used template. 00081 $this->insertJobsFromTitles( $titleArray ); 00082 } 00083 } 00084 00090 protected function insertJobsFromTitles( $titleArray ) { 00091 # We make subpartitions in the sense that the start of the first job 00092 # will be the start of the parent partition, and the end of the last 00093 # job will be the end of the parent partition. 00094 $jobs = array(); 00095 $start = $this->mStart; # start of the current job 00096 $numTitles = 0; 00097 foreach ( $titleArray as $title ) { 00098 $id = $title->getArticleID(); 00099 # $numTitles is now the number of titles in the current job not 00100 # including the current ID 00101 if ( $numTitles >= $this->mRowsPerJob ) { 00102 # Add a job up to but not including the current ID 00103 $params = array( 00104 'table' => $this->mTable, 00105 'start' => $start, 00106 'end' => $id - 1 00107 ); 00108 $jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params ); 00109 $start = $id; 00110 $numTitles = 0; 00111 } 00112 $numTitles++; 00113 } 00114 # Last job 00115 $params = array( 00116 'table' => $this->mTable, 00117 'start' => $start, 00118 'end' => $this->mEnd 00119 ); 00120 $jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params ); 00121 wfDebug( __METHOD__.": repartitioning into " . count( $jobs ) . " jobs\n" ); 00122 00123 if ( count( $jobs ) < 2 ) { 00124 # I don't think this is possible at present, but handling this case 00125 # makes the code a bit more robust against future code updates and 00126 # avoids a potential infinite loop of repartitioning 00127 wfDebug( __METHOD__.": repartitioning failed!\n" ); 00128 $this->invalidateTitles( $titleArray ); 00129 return; 00130 } 00131 00132 Job::batchInsert( $jobs ); 00133 } 00134 00135 protected function insertJobs() { 00136 $batches = $this->mCache->partition( $this->mTable, $this->mRowsPerJob ); 00137 if ( !$batches ) { 00138 return; 00139 } 00140 $jobs = array(); 00141 foreach ( $batches as $batch ) { 00142 $params = array( 00143 'table' => $this->mTable, 00144 'start' => $batch[0], 00145 'end' => $batch[1], 00146 ); 00147 $jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params ); 00148 } 00149 Job::batchInsert( $jobs ); 00150 } 00151 00156 public function invalidate( $startId = false, $endId = false ) { 00157 $titleArray = $this->mCache->getLinks( $this->mTable, $startId, $endId ); 00158 $this->invalidateTitles( $titleArray ); 00159 } 00160 00164 protected function invalidateTitles( $titleArray ) { 00165 global $wgUseFileCache, $wgUseSquid; 00166 00167 $dbw = wfGetDB( DB_MASTER ); 00168 $timestamp = $dbw->timestamp(); 00169 00170 # Get all IDs in this query into an array 00171 $ids = array(); 00172 foreach ( $titleArray as $title ) { 00173 $ids[] = $title->getArticleID(); 00174 } 00175 00176 if ( !$ids ) { 00177 return; 00178 } 00179 00180 # Update page_touched 00181 $batches = array_chunk( $ids, $this->mRowsPerQuery ); 00182 foreach ( $batches as $batch ) { 00183 $dbw->update( 'page', 00184 array( 'page_touched' => $timestamp ), 00185 array( 'page_id IN (' . $dbw->makeList( $batch ) . ')' ), 00186 __METHOD__ 00187 ); 00188 } 00189 00190 # Update squid 00191 if ( $wgUseSquid ) { 00192 $u = SquidUpdate::newFromTitles( $titleArray ); 00193 $u->doUpdate(); 00194 } 00195 00196 # Update file cache 00197 if ( $wgUseFileCache ) { 00198 foreach ( $titleArray as $title ) { 00199 HTMLFileCache::clearFileCache( $title ); 00200 } 00201 } 00202 } 00203 00204 } 00205 00212 class HTMLCacheUpdateJob extends Job { 00213 var $table, $start, $end; 00214 00221 function __construct( $title, $params, $id = 0 ) { 00222 parent::__construct( 'htmlCacheUpdate', $title, $params, $id ); 00223 $this->table = $params['table']; 00224 $this->start = $params['start']; 00225 $this->end = $params['end']; 00226 } 00227 00228 public function run() { 00229 $update = new HTMLCacheUpdate( $this->title, $this->table, $this->start, $this->end ); 00230 $update->doUpdate(); 00231 return true; 00232 } 00233 }