00001 <?php
00002
00026 class HTMLCacheUpdate
00027 {
00028 public $mTitle, $mTable, $mPrefix, $mStart, $mEnd;
00029 public $mRowsPerJob, $mRowsPerQuery;
00030
00031 function __construct( $titleTo, $table, $start = false, $end = false ) {
00032 global $wgUpdateRowsPerJob, $wgUpdateRowsPerQuery;
00033
00034 $this->mTitle = $titleTo;
00035 $this->mTable = $table;
00036 $this->mStart = $start;
00037 $this->mEnd = $end;
00038 $this->mRowsPerJob = $wgUpdateRowsPerJob;
00039 $this->mRowsPerQuery = $wgUpdateRowsPerQuery;
00040 $this->mCache = $this->mTitle->getBacklinkCache();
00041 }
00042
00043 public function doUpdate() {
00044 if ( $this->mStart || $this->mEnd ) {
00045 $this->doPartialUpdate();
00046 return;
00047 }
00048
00049 # Get an estimate of the number of rows from the BacklinkCache
00050 $numRows = $this->mCache->getNumLinks( $this->mTable );
00051 if ( $numRows > $this->mRowsPerJob * 2 ) {
00052 # Do fast cached partition
00053 $this->insertJobs();
00054 } else {
00055 # Get the links from the DB
00056 $titleArray = $this->mCache->getLinks( $this->mTable );
00057 # Check if the row count estimate was correct
00058 if ( $titleArray->count() > $this->mRowsPerJob * 2 ) {
00059 # Not correct, do accurate partition
00060 wfDebug( __METHOD__.": row count estimate was incorrect, repartitioning\n" );
00061 $this->insertJobsFromTitles( $titleArray );
00062 } else {
00063 $this->invalidateTitles( $titleArray );
00064 }
00065 }
00066 wfRunHooks( 'HTMLCacheUpdate::doUpdate', array($this->mTitle) );
00067 }
00068
00072 protected function doPartialUpdate() {
00073 $titleArray = $this->mCache->getLinks( $this->mTable, $this->mStart, $this->mEnd );
00074 if ( $titleArray->count() <= $this->mRowsPerJob * 2 ) {
00075 # This partition is small enough, do the update
00076 $this->invalidateTitles( $titleArray );
00077 } else {
00078 # Partitioning was excessively inaccurate. Divide the job further.
00079 # This can occur when a large number of links are added in a short
00080 # period of time, say by updating a heavily-used template.
00081 $this->insertJobsFromTitles( $titleArray );
00082 }
00083 }
00084
00090 protected function insertJobsFromTitles( $titleArray ) {
00091 # We make subpartitions in the sense that the start of the first job
00092 # will be the start of the parent partition, and the end of the last
00093 # job will be the end of the parent partition.
00094 $jobs = array();
00095 $start = $this->mStart; # start of the current job
00096 $numTitles = 0;
00097 foreach ( $titleArray as $title ) {
00098 $id = $title->getArticleID();
00099 # $numTitles is now the number of titles in the current job not
00100 # including the current ID
00101 if ( $numTitles >= $this->mRowsPerJob ) {
00102 # Add a job up to but not including the current ID
00103 $params = array(
00104 'table' => $this->mTable,
00105 'start' => $start,
00106 'end' => $id - 1
00107 );
00108 $jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params );
00109 $start = $id;
00110 $numTitles = 0;
00111 }
00112 $numTitles++;
00113 }
00114 # Last job
00115 $params = array(
00116 'table' => $this->mTable,
00117 'start' => $start,
00118 'end' => $this->mEnd
00119 );
00120 $jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params );
00121 wfDebug( __METHOD__.": repartitioning into " . count( $jobs ) . " jobs\n" );
00122
00123 if ( count( $jobs ) < 2 ) {
00124 # I don't think this is possible at present, but handling this case
00125 # makes the code a bit more robust against future code updates and
00126 # avoids a potential infinite loop of repartitioning
00127 wfDebug( __METHOD__.": repartitioning failed!\n" );
00128 $this->invalidateTitles( $titleArray );
00129 return;
00130 }
00131
00132 Job::batchInsert( $jobs );
00133 }
00134
00135 protected function insertJobs() {
00136 $batches = $this->mCache->partition( $this->mTable, $this->mRowsPerJob );
00137 if ( !$batches ) {
00138 return;
00139 }
00140 $jobs = array();
00141 foreach ( $batches as $batch ) {
00142 $params = array(
00143 'table' => $this->mTable,
00144 'start' => $batch[0],
00145 'end' => $batch[1],
00146 );
00147 $jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params );
00148 }
00149 Job::batchInsert( $jobs );
00150 }
00151
00156 public function invalidate( $startId = false, $endId = false ) {
00157 $titleArray = $this->mCache->getLinks( $this->mTable, $startId, $endId );
00158 $this->invalidateTitles( $titleArray );
00159 }
00160
00164 protected function invalidateTitles( $titleArray ) {
00165 global $wgUseFileCache, $wgUseSquid;
00166
00167 $dbw = wfGetDB( DB_MASTER );
00168 $timestamp = $dbw->timestamp();
00169
00170 # Get all IDs in this query into an array
00171 $ids = array();
00172 foreach ( $titleArray as $title ) {
00173 $ids[] = $title->getArticleID();
00174 }
00175
00176 if ( !$ids ) {
00177 return;
00178 }
00179
00180 # Update page_touched
00181 $batches = array_chunk( $ids, $this->mRowsPerQuery );
00182 foreach ( $batches as $batch ) {
00183 $dbw->update( 'page',
00184 array( 'page_touched' => $timestamp ),
00185 array( 'page_id IN (' . $dbw->makeList( $batch ) . ')' ),
00186 __METHOD__
00187 );
00188 }
00189
00190 # Update squid
00191 if ( $wgUseSquid ) {
00192 $u = SquidUpdate::newFromTitles( $titleArray );
00193 $u->doUpdate();
00194 }
00195
00196 # Update file cache
00197 if ( $wgUseFileCache ) {
00198 foreach ( $titleArray as $title ) {
00199 HTMLFileCache::clearFileCache( $title );
00200 }
00201 }
00202 }
00203
00204 }
00205
00212 class HTMLCacheUpdateJob extends Job {
00213 var $table, $start, $end;
00214
00221 function __construct( $title, $params, $id = 0 ) {
00222 parent::__construct( 'htmlCacheUpdate', $title, $params, $id );
00223 $this->table = $params['table'];
00224 $this->start = $params['start'];
00225 $this->end = $params['end'];
00226 }
00227
00228 public function run() {
00229 $update = new HTMLCacheUpdate( $this->title, $this->table, $this->start, $this->end );
00230 $update->doUpdate();
00231 return true;
00232 }
00233 }