00001 <?php
00030 require_once( dirname(__FILE__) . '/Maintenance.php' );
00031
00032 class Orphans extends Maintenance {
00033 public function __construct() {
00034 parent::__construct();
00035 $this->mDescription = "Look for 'orphan' revisions hooked to pages which don't exist\n" .
00036 "And 'childless' pages with no revisions\n" .
00037 "Then, kill the poor widows and orphans\n" .
00038 "Man this is depressing";
00039 $this->addOption( 'fix', 'Actually fix broken entries' );
00040 }
00041
00042 public function execute() {
00043 global $wgTitle;
00044 $wgTitle = Title::newFromText( 'Orphan revision cleanup script' );
00045 $this->checkOrphans( $this->hasOption( 'fix' ) );
00046 $this->checkSeparation( $this->hasOption( 'fix' ) );
00047 # Does not work yet, do not use
00048 # $this->checkWidows( $this->hasOption( 'fix' ) );
00049 }
00050
00056 private function lockTables( &$db, $extraTable = null ) {
00057 $tbls = array( 'page', 'revision', 'redirect' );
00058 if( $extraTable )
00059 $tbls[] = $extraTable;
00060 $db->lockTables( array(), $tbls, __METHOD__, false );
00061 }
00062
00067 private function checkOrphans( $fix ) {
00068 $dbw = wfGetDB( DB_MASTER );
00069 $page = $dbw->tableName( 'page' );
00070 $revision = $dbw->tableName( 'revision' );
00071
00072 if( $fix ) {
00073 $this->lockTables( $dbw );
00074 }
00075
00076 $this->output( "Checking for orphan revision table entries... (this may take a while on a large wiki)\n" );
00077 $result = $dbw->query( "
00078 SELECT *
00079 FROM $revision LEFT OUTER JOIN $page ON rev_page=page_id
00080 WHERE page_id IS NULL
00081 ");
00082 $orphans = $dbw->numRows( $result );
00083 if( $orphans > 0 ) {
00084 global $wgContLang;
00085 $this->output( "$orphans orphan revisions...\n" );
00086 $this->output( sprintf( "%10s %10s %14s %20s %s\n", 'rev_id', 'rev_page', 'rev_timestamp', 'rev_user_text', 'rev_comment' ) );
00087 foreach( $result as $row ) {
00088 $comment = ( $row->rev_comment == '' )
00089 ? ''
00090 : '(' . $wgContLang->truncate( $row->rev_comment, 40 ) . ')';
00091 $this->output( sprintf( "%10d %10d %14s %20s %s\n",
00092 $row->rev_id,
00093 $row->rev_page,
00094 $row->rev_timestamp,
00095 $wgContLang->truncate( $row->rev_user_text, 17 ),
00096 $comment ) );
00097 if( $fix ) {
00098 $dbw->delete( 'revision', array( 'rev_id' => $row->rev_id ) );
00099 }
00100 }
00101 if( !$fix ) {
00102 $this->output( "Run again with --fix to remove these entries automatically.\n" );
00103 }
00104 } else {
00105 $this->output( "No orphans! Yay!\n" );
00106 }
00107
00108 if( $fix ) {
00109 $dbw->unlockTables();
00110 }
00111 }
00112
00119 private function checkWidows( $fix ) {
00120 $dbw = wfGetDB( DB_MASTER );
00121 $page = $dbw->tableName( 'page' );
00122 $revision = $dbw->tableName( 'revision' );
00123
00124 if( $fix ) {
00125 $this->lockTables( $dbw );
00126 }
00127
00128 $this->output( "\nChecking for childless page table entries... (this may take a while on a large wiki)\n" );
00129 $result = $dbw->query( "
00130 SELECT *
00131 FROM $page LEFT OUTER JOIN $revision ON page_latest=rev_id
00132 WHERE rev_id IS NULL
00133 ");
00134 $widows = $dbw->numRows( $result );
00135 if( $widows > 0 ) {
00136 global $wgContLang;
00137 $this->output( "$widows childless pages...\n" );
00138 $this->output( sprintf( "%10s %11s %2s %s\n", 'page_id', 'page_latest', 'ns', 'page_title' ) );
00139 foreach( $result as $row ) {
00140 printf( "%10d %11d %2d %s\n",
00141 $row->page_id,
00142 $row->page_latest,
00143 $row->page_namespace,
00144 $row->page_title );
00145 if( $fix ) {
00146 $dbw->delete( 'page', array( 'page_id' => $row->page_id ) );
00147 }
00148 }
00149 if( !$fix ) {
00150 $this->output( "Run again with --fix to remove these entries automatically.\n" );
00151 }
00152 } else {
00153 $this->output( "No childless pages! Yay!\n" );
00154 }
00155
00156 if( $fix ) {
00157 $dbw->unlockTables();
00158 }
00159 }
00160
00165 private function checkSeparation( $fix ) {
00166 $dbw = wfGetDB( DB_MASTER );
00167 $page = $dbw->tableName( 'page' );
00168 $revision = $dbw->tableName( 'revision' );
00169 $text = $dbw->tableName( 'text' );
00170
00171 if( $fix ) {
00172 $dbw->lockTables( $dbw, 'text' );
00173 }
00174
00175 $this->output( "\nChecking for pages whose page_latest links are incorrect... (this may take a while on a large wiki)\n" );
00176 $result = $dbw->query( "
00177 SELECT *
00178 FROM $page LEFT OUTER JOIN $revision ON page_latest=rev_id
00179 ");
00180 $found = 0;
00181 foreach( $result as $row ) {
00182 $result2 = $dbw->query( "
00183 SELECT MAX(rev_timestamp) as max_timestamp
00184 FROM $revision
00185 WHERE rev_page=$row->page_id
00186 " );
00187 $row2 = $dbw->fetchObject( $result2 );
00188 $dbw->freeResult( $result2 );
00189 if( $row2 ) {
00190 if( $row->rev_timestamp != $row2->max_timestamp ) {
00191 if( $found == 0 ) {
00192 $this->output( sprintf( "%10s %10s %14s %14s\n",
00193 'page_id', 'rev_id', 'timestamp', 'max timestamp' ) );
00194 }
00195 ++$found;
00196 $this->output( sprintf( "%10d %10d %14s %14s\n",
00197 $row->page_id,
00198 $row->page_latest,
00199 $row->rev_timestamp,
00200 $row2->max_timestamp ) );
00201 if( $fix ) {
00202 # ...
00203 $maxId = $dbw->selectField(
00204 'revision',
00205 'rev_id',
00206 array(
00207 'rev_page' => $row->page_id,
00208 'rev_timestamp' => $row2->max_timestamp ) );
00209 $this->output( "... updating to revision $maxId\n" );
00210 $maxRev = Revision::newFromId( $maxId );
00211 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
00212 $article = new Article( $title );
00213 $article->updateRevisionOn( $dbw, $maxRev );
00214 }
00215 }
00216 } else {
00217 $this->output( "wtf\n" );
00218 }
00219 }
00220
00221 if( $found ) {
00222 $this->output( "Found $found pages with incorrect latest revision.\n" );
00223 } else {
00224 $this->output( "No pages with incorrect latest revision. Yay!\n" );
00225 }
00226 if( !$fix && $found > 0 ) {
00227 $this->output( "Run again with --fix to remove these entries automatically.\n" );
00228 }
00229
00230 if( $fix ) {
00231 $dbw->unlockTables();
00232 }
00233 }
00234 }
00235
00236 $maintClass = "Orphans";
00237 require_once( DO_MAINTENANCE );