00001 <?php
00024 require_once( dirname(__FILE__) . '/Maintenance.php' );
00025
00026 class ConvertLinks extends Maintenance {
00027
00028 public function __construct() {
00029 parent::__construct();
00030 $this->mDescription = "Convert from the old links schema (string->ID) to the new schema (ID->ID)
00031 The wiki should be put into read-only mode while this script executes";
00032 }
00033
00034 public function execute() {
00035 global $wgDBtype;
00036 if( $wgDBtype == 'postgres' ) {
00037 $this->output( "Links table already ok on Postgres.\n" );
00038 return;
00039 }
00040
00041 $this->output( "Converting links table to ID-ID...\n" );
00042
00043 global $wgLang, $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname;
00044 global $noKeys, $logPerformance, $fh;
00045
00046 $tuplesAdded = $numBadLinks = $curRowsRead = 0; #counters etc
00047 $totalTuplesInserted = 0; # total tuples INSERTed into links_temp
00048
00049 $reportCurReadProgress = true; #whether or not to give progress reports while reading IDs from cur table
00050 $curReadReportInterval = 1000; #number of rows between progress reports
00051
00052 $reportLinksConvProgress = true; #whether or not to give progress reports during conversion
00053 $linksConvInsertInterval = 1000; #number of rows per INSERT
00054
00055 $initialRowOffset = 0;
00056 #$finalRowOffset = 0; # not used yet; highest row number from links table to process
00057
00058 # Overwrite the old links table with the new one. If this is set to false,
00059 # the new table will be left at links_temp.
00060 $overwriteLinksTable = true;
00061
00062 # Don't create keys, and so allow duplicates in the new links table.
00063 # This gives a huge speed improvement for very large links tables which are MyISAM. (What about InnoDB?)
00064 $noKeys = false;
00065
00066
00067 $logPerformance = false; # output performance data to a file
00068 $perfLogFilename = "convLinksPerf.txt";
00069 #--------------------------------------------------------------------
00070
00071 $dbw = wfGetDB( DB_MASTER );
00072 list ($cur, $links, $links_temp, $links_backup) = $dbw->tableNamesN( 'cur', 'links', 'links_temp', 'links_backup' );
00073
00074 $res = $dbw->query( "SELECT l_from FROM $links LIMIT 1" );
00075 if ( $dbw->fieldType( $res, 0 ) == "int" ) {
00076 $this->output( "Schema already converted\n" );
00077 return;
00078 }
00079
00080 $res = $dbw->query( "SELECT COUNT(*) AS count FROM $links" );
00081 $row = $dbw->fetchObject($res);
00082 $numRows = $row->count;
00083 $dbw->freeResult( $res );
00084
00085 if ( $numRows == 0 ) {
00086 $this->output( "Updating schema (no rows to convert)...\n" );
00087 $this->createTempTable();
00088 } else {
00089 if ( $logPerformance ) { $fh = fopen ( $perfLogFilename, "w" ); }
00090 $baseTime = $startTime = $this->getMicroTime();
00091 # Create a title -> cur_id map
00092 $this->output( "Loading IDs from $cur table...\n" );
00093 $this->performanceLog ( "Reading $numRows rows from cur table...\n" );
00094 $this->performanceLog ( "rows read vs seconds elapsed:\n" );
00095
00096 $dbw->bufferResults( false );
00097 $res = $dbw->query( "SELECT cur_namespace,cur_title,cur_id FROM $cur" );
00098 $ids = array();
00099
00100 while ( $row = $dbw->fetchObject( $res ) ) {
00101 $title = $row->cur_title;
00102 if ( $row->cur_namespace ) {
00103 $title = $wgLang->getNsText( $row->cur_namespace ) . ":$title";
00104 }
00105 $ids[$title] = $row->cur_id;
00106 $curRowsRead++;
00107 if ($reportCurReadProgress) {
00108 if (($curRowsRead % $curReadReportInterval) == 0) {
00109 $this->performanceLog( $curRowsRead . " " . ($this->getMicroTime() - $baseTime) . "\n" );
00110 $this->output( "\t$curRowsRead rows of $cur table read.\n" );
00111 }
00112 }
00113 }
00114 $dbw->freeResult( $res );
00115 $dbw->bufferResults( true );
00116 $this->output( "Finished loading IDs.\n\n" );
00117 $this->performanceLog( "Took " . ($this->getMicroTime() - $baseTime) . " seconds to load IDs.\n\n" );
00118 #--------------------------------------------------------------------
00119
00120 # Now, step through the links table (in chunks of $linksConvInsertInterval rows),
00121 # convert, and write to the new table.
00122 $this->createTempTable();
00123 $this->performanceLog( "Resetting timer.\n\n" );
00124 $baseTime = $this->getMicroTime();
00125 $this->output( "Processing $numRows rows from $links table...\n" );
00126 $this->performanceLog( "Processing $numRows rows from $links table...\n" );
00127 $this->performanceLog( "rows inserted vs seconds elapsed:\n" );
00128
00129 for ($rowOffset = $initialRowOffset; $rowOffset < $numRows; $rowOffset += $linksConvInsertInterval) {
00130 $sqlRead = "SELECT * FROM $links ";
00131 $sqlRead = $dbw->limitResult($sqlRead, $linksConvInsertInterval,$rowOffset);
00132 $res = $dbw->query($sqlRead);
00133 if ( $noKeys ) {
00134 $sqlWrite = array("INSERT INTO $links_temp (l_from,l_to) VALUES ");
00135 } else {
00136 $sqlWrite = array("INSERT IGNORE INTO $links_temp (l_from,l_to) VALUES ");
00137 }
00138
00139 $tuplesAdded = 0; # no tuples added to INSERT yet
00140 while ( $row = $dbw->fetchObject($res) ) {
00141 $fromTitle = $row->l_from;
00142 if ( array_key_exists( $fromTitle, $ids ) ) { # valid title
00143 $from = $ids[$fromTitle];
00144 $to = $row->l_to;
00145 if ( $tuplesAdded != 0 ) {
00146 $sqlWrite[] = ",";
00147 }
00148 $sqlWrite[] = "($from,$to)";
00149 $tuplesAdded++;
00150 } else { # invalid title
00151 $numBadLinks++;
00152 }
00153 }
00154 $dbw->freeResult($res);
00155 #$this->output( "rowOffset: $rowOffset\ttuplesAdded: $tuplesAdded\tnumBadLinks: $numBadLinks\n" );
00156 if ( $tuplesAdded != 0 ) {
00157 if ($reportLinksConvProgress) {
00158 $this->output( "Inserting $tuplesAdded tuples into $links_temp..." );
00159 }
00160 $dbw->query( implode("",$sqlWrite) );
00161 $totalTuplesInserted += $tuplesAdded;
00162 if ($reportLinksConvProgress)
00163 $this->output( " done. Total $totalTuplesInserted tuples inserted.\n" );
00164 $this->performanceLog( $totalTuplesInserted . " " . ($this->getMicroTime() - $baseTime) . "\n" );
00165 }
00166 }
00167 $this->output( "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n\n" );
00168 $this->performanceLog( "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n" );
00169 $this->performanceLog( "Total execution time: " . ($this->getMicroTime() - $startTime) . " seconds.\n" );
00170 if ( $logPerformance ) { fclose ( $fh ); }
00171 }
00172 #--------------------------------------------------------------------
00173
00174 if ( $overwriteLinksTable ) {
00175 $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname );
00176 if (!($dbConn->isOpen())) {
00177 $this->output( "Opening connection to database failed.\n" );
00178 return;
00179 }
00180 # Check for existing links_backup, and delete it if it exists.
00181 $this->output( "Dropping backup links table if it exists..." );
00182 $dbConn->query( "DROP TABLE IF EXISTS $links_backup", DB_MASTER);
00183 $this->output( " done.\n" );
00184
00185 # Swap in the new table, and move old links table to links_backup
00186 $this->output( "Swapping tables '$links' to '$links_backup'; '$links_temp' to '$links'..." );
00187 $dbConn->query( "RENAME TABLE links TO $links_backup, $links_temp TO $links", DB_MASTER );
00188 $this->output( " done.\n\n" );
00189
00190 $dbConn->close();
00191 $this->output( "Conversion complete. The old table remains at $links_backup;\n" );
00192 $this->output( "delete at your leisure.\n" );
00193 } else {
00194 $this->output( "Conversion complete. The converted table is at $links_temp;\n" );
00195 $this->output( "the original links table is unchanged.\n" );
00196 }
00197 }
00198
00199 private function createTempTable() {
00200 global $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname;
00201 global $noKeys;
00202 $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname );
00203
00204 if (!($dbConn->isOpen())) {
00205 $this->output( "Opening connection to database failed.\n" );
00206 return;
00207 }
00208 $links_temp = $dbConn->tableName( 'links_temp' );
00209
00210 $this->output( "Dropping temporary links table if it exists..." );
00211 $dbConn->query( "DROP TABLE IF EXISTS $links_temp");
00212 $this->output( " done.\n" );
00213
00214 $this->output( "Creating temporary links table..." );
00215 if ( $noKeys ) {
00216 $dbConn->query( "CREATE TABLE $links_temp ( " .
00217 "l_from int(8) unsigned NOT NULL default '0', " .
00218 "l_to int(8) unsigned NOT NULL default '0')");
00219 } else {
00220 $dbConn->query( "CREATE TABLE $links_temp ( " .
00221 "l_from int(8) unsigned NOT NULL default '0', " .
00222 "l_to int(8) unsigned NOT NULL default '0', " .
00223 "UNIQUE KEY l_from(l_from,l_to), " .
00224 "KEY (l_to))");
00225 }
00226 $this->output( " done.\n\n" );
00227 }
00228
00229 private function performanceLog( $text ) {
00230 global $logPerformance, $fh;
00231 if ( $logPerformance ) {
00232 fwrite( $fh, $text );
00233 }
00234 }
00235
00236 private function getMicroTime() { # return time in seconds, with microsecond accuracy
00237 list($usec, $sec) = explode(" ", microtime());
00238 return ((float)$usec + (float)$sec);
00239 }
00240 }
00241
00242 $maintClass = "ConvertLinks";
00243 require_once( DO_MAINTENANCE );