MediaWiki
master
|
00001 <?php 00024 require_once( __DIR__ . '/Maintenance.php' ); 00025 00031 class RefreshLinks extends Maintenance { 00032 public function __construct() { 00033 parent::__construct(); 00034 $this->mDescription = "Refresh link tables"; 00035 $this->addOption( 'dfn-only', 'Delete links from nonexistent articles only' ); 00036 $this->addOption( 'new-only', 'Only affect articles with just a single edit' ); 00037 $this->addOption( 'redirects-only', 'Only fix redirects, not all links' ); 00038 $this->addOption( 'old-redirects-only', 'Only fix redirects with no redirect table entry' ); 00039 $this->addOption( 'm', 'Maximum replication lag', false, true ); 00040 $this->addOption( 'e', 'Last page id to refresh', false, true ); 00041 $this->addArg( 'start', 'Page_id to start from, default 1', false ); 00042 $this->setBatchSize( 100 ); 00043 } 00044 00045 public function execute() { 00046 $max = $this->getOption( 'm', 0 ); 00047 if ( !$this->hasOption( 'dfn-only' ) ) { 00048 $start = $this->getArg( 0, 1 ); 00049 $new = $this->getOption( 'new-only', false ); 00050 $end = $this->getOption( 'e', 0 ); 00051 $redir = $this->getOption( 'redirects-only', false ); 00052 $oldRedir = $this->getOption( 'old-redirects-only', false ); 00053 $this->doRefreshLinks( $start, $new, $max, $end, $redir, $oldRedir ); 00054 } 00055 $this->deleteLinksFromNonexistent( $max, $this->mBatchSize ); 00056 } 00057 00067 private function doRefreshLinks( $start, $newOnly = false, $maxLag = false, 00068 $end = 0, $redirectsOnly = false, $oldRedirectsOnly = false ) { 00069 global $wgParser, $wgUseTidy; 00070 00071 $reportingInterval = 100; 00072 $dbr = wfGetDB( DB_SLAVE ); 00073 $start = intval( $start ); 00074 00075 // Give extensions a chance to optimize settings 00076 wfRunHooks( 'MaintenanceRefreshLinksInit', array( $this ) ); 00077 00078 # Don't generate extension images (e.g. Timeline) 00079 $wgParser->clearTagHooks(); 00080 00081 # Don't use HTML tidy 00082 $wgUseTidy = false; 00083 00084 $what = $redirectsOnly ? "redirects" : "links"; 00085 00086 if ( $oldRedirectsOnly ) { 00087 # This entire code path is cut-and-pasted from below. Hurrah. 00088 00089 $conds = array( 00090 "page_is_redirect=1", 00091 "rd_from IS NULL" 00092 ); 00093 00094 if ( $end == 0 ) { 00095 $conds[] = "page_id >= $start"; 00096 } else { 00097 $conds[] = "page_id BETWEEN $start AND $end"; 00098 } 00099 00100 $res = $dbr->select( 00101 array( 'page', 'redirect' ), 00102 'page_id', 00103 $conds, 00104 __METHOD__, 00105 array(), 00106 array( 'redirect' => array( "LEFT JOIN", "page_id=rd_from" ) ) 00107 ); 00108 $num = $dbr->numRows( $res ); 00109 $this->output( "Refreshing $num old redirects from $start...\n" ); 00110 00111 $i = 0; 00112 00113 foreach ( $res as $row ) { 00114 if ( !( ++$i % $reportingInterval ) ) { 00115 $this->output( "$i\n" ); 00116 wfWaitForSlaves(); 00117 } 00118 $this->fixRedirect( $row->page_id ); 00119 } 00120 } elseif ( $newOnly ) { 00121 $this->output( "Refreshing $what from " ); 00122 $res = $dbr->select( 'page', 00123 array( 'page_id' ), 00124 array( 00125 'page_is_new' => 1, 00126 "page_id >= $start" ), 00127 __METHOD__ 00128 ); 00129 $num = $dbr->numRows( $res ); 00130 $this->output( "$num new articles...\n" ); 00131 00132 $i = 0; 00133 foreach ( $res as $row ) { 00134 if ( !( ++$i % $reportingInterval ) ) { 00135 $this->output( "$i\n" ); 00136 wfWaitForSlaves(); 00137 } 00138 if ( $redirectsOnly ) { 00139 $this->fixRedirect( $row->page_id ); 00140 } else { 00141 self::fixLinksFromArticle( $row->page_id ); 00142 } 00143 } 00144 } else { 00145 if ( !$end ) { 00146 $maxPage = $dbr->selectField( 'page', 'max(page_id)', false ); 00147 $maxRD = $dbr->selectField( 'redirect', 'max(rd_from)', false ); 00148 $end = max( $maxPage, $maxRD ); 00149 } 00150 $this->output( "Refreshing redirects table.\n" ); 00151 $this->output( "Starting from page_id $start of $end.\n" ); 00152 00153 for ( $id = $start; $id <= $end; $id++ ) { 00154 00155 if ( !( $id % $reportingInterval ) ) { 00156 $this->output( "$id\n" ); 00157 wfWaitForSlaves(); 00158 } 00159 $this->fixRedirect( $id ); 00160 } 00161 00162 if ( !$redirectsOnly ) { 00163 $this->output( "Refreshing links table.\n" ); 00164 $this->output( "Starting from page_id $start of $end.\n" ); 00165 00166 for ( $id = $start; $id <= $end; $id++ ) { 00167 00168 if ( !( $id % $reportingInterval ) ) { 00169 $this->output( "$id\n" ); 00170 wfWaitForSlaves(); 00171 } 00172 self::fixLinksFromArticle( $id ); 00173 } 00174 } 00175 } 00176 } 00177 00182 private function fixRedirect( $id ) { 00183 $page = WikiPage::newFromID( $id ); 00184 $dbw = wfGetDB( DB_MASTER ); 00185 00186 if ( $page === null ) { 00187 // This page doesn't exist (any more) 00188 // Delete any redirect table entry for it 00189 $dbw->delete( 'redirect', array( 'rd_from' => $id ), 00190 __METHOD__ ); 00191 return; 00192 } 00193 00194 $rt = $page->getRedirectTarget(); 00195 00196 if ( $rt === null ) { 00197 // The page is not a redirect 00198 // Delete any redirect table entry for it 00199 $dbw->delete( 'redirect', array( 'rd_from' => $id ), 00200 __METHOD__ ); 00201 } 00202 } 00203 00208 public static function fixLinksFromArticle( $id ) { 00209 $page = WikiPage::newFromID( $id ); 00210 00211 LinkCache::singleton()->clear(); 00212 00213 if ( $page === null ) { 00214 return; 00215 } 00216 00217 $content = $page->getContent( Revision::RAW ); 00218 if ( null === false ) { 00219 return; 00220 } 00221 00222 $dbw = wfGetDB( DB_MASTER ); 00223 $dbw->begin( __METHOD__ ); 00224 00225 $updates = $content->getSecondaryDataUpdates( $page->getTitle() ); 00226 DataUpdate::runUpdates( $updates ); 00227 00228 $dbw->commit( __METHOD__ ); 00229 } 00230 00240 private function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) { 00241 wfWaitForSlaves(); 00242 00243 $dbw = wfGetDB( DB_MASTER ); 00244 00245 $lb = wfGetLBFactory()->newMainLB(); 00246 $dbr = $lb->getConnection( DB_SLAVE ); 00247 $dbr->bufferResults( false ); 00248 00249 $linksTables = array( // table name => page_id field 00250 'pagelinks' => 'pl_from', 00251 'imagelinks' => 'il_from', 00252 'categorylinks' => 'cl_from', 00253 'templatelinks' => 'tl_from', 00254 'externallinks' => 'el_from', 00255 'iwlinks' => 'iwl_from', 00256 'langlinks' => 'll_from', 00257 'redirect' => 'rd_from', 00258 'page_props' => 'pp_page', 00259 ); 00260 00261 foreach ( $linksTables as $table => $field ) { 00262 $this->output( "Retrieving illegal entries from $table... " ); 00263 00264 // SELECT DISTINCT( $field ) FROM $table LEFT JOIN page ON $field=page_id WHERE page_id IS NULL; 00265 $results = $dbr->select( array( $table, 'page' ), 00266 $field, 00267 array( 'page_id' => null ), 00268 __METHOD__, 00269 'DISTINCT', 00270 array( 'page' => array( 'LEFT JOIN', "$field=page_id" ) ) 00271 ); 00272 00273 $counter = 0; 00274 $list = array(); 00275 $this->output( "0.." ); 00276 foreach ( $results as $row ) { 00277 $counter++; 00278 $list[] = $row->$field; 00279 if ( ( $counter % $batchSize ) == 0 ) { 00280 wfWaitForSlaves(); 00281 $dbw->delete( $table, array( $field => $list ), __METHOD__ ); 00282 00283 $this->output( $counter . ".." ); 00284 $list = array(); 00285 } 00286 } 00287 $this->output( $counter ); 00288 if ( count( $list ) > 0 ) { 00289 $dbw->delete( $table, array( $field => $list ), __METHOD__ ); 00290 } 00291 $this->output( "\n" ); 00292 wfWaitForSlaves(); 00293 } 00294 $lb->closeAll(); 00295 } 00296 } 00297 00298 $maintClass = 'RefreshLinks'; 00299 require_once( RUN_MAINTENANCE_IF_MAIN );