MediaWiki  master
RefreshLinksJob.php
Go to the documentation of this file.
00001 <?php
00029 class RefreshLinksJob extends Job {
00030         function __construct( $title, $params = '', $id = 0 ) {
00031                 parent::__construct( 'refreshLinks', $title, $params, $id );
00032                 $this->removeDuplicates = true; // job is expensive
00033         }
00034 
00039         function run() {
00040                 wfProfileIn( __METHOD__ );
00041 
00042                 $linkCache = LinkCache::singleton();
00043                 $linkCache->clear();
00044 
00045                 if ( is_null( $this->title ) ) {
00046                         $this->error = "refreshLinks: Invalid title";
00047                         wfProfileOut( __METHOD__ );
00048                         return false;
00049                 }
00050 
00051                 # Wait for the DB of the current/next slave DB handle to catch up to the master.
00052                 # This way, we get the correct page_latest for templates or files that just changed
00053                 # milliseconds ago, having triggered this job to begin with.
00054                 if ( isset( $this->params['masterPos'] ) ) {
00055                         wfGetLB()->waitFor( $this->params['masterPos'] );
00056                 }
00057 
00058                 $revision = Revision::newFromTitle( $this->title, false, Revision::READ_NORMAL );
00059                 if ( !$revision ) {
00060                         $this->error = 'refreshLinks: Article not found "' .
00061                                 $this->title->getPrefixedDBkey() . '"';
00062                         wfProfileOut( __METHOD__ );
00063                         return false; // XXX: what if it was just deleted?
00064                 }
00065 
00066                 self::runForTitleInternal( $this->title, $revision, __METHOD__ );
00067 
00068                 wfProfileOut( __METHOD__ );
00069                 return true;
00070         }
00071 
00072         public static function runForTitleInternal( Title $title, Revision $revision, $fname ) {
00073                 wfProfileIn( $fname );
00074                 $content = $revision->getContent( Revision::RAW );
00075 
00076                 if ( !$content ) {
00077                         // if there is no content, pretend the content is empty
00078                         $content = $revision->getContentHandler()->makeEmptyContent();
00079                 }
00080 
00081                 $updates = $content->getSecondaryDataUpdates( $title, null, false );
00082                 DataUpdate::runUpdates( $updates );
00083                 wfProfileOut( $fname );
00084         }
00085 }
00086 
00093 class RefreshLinksJob2 extends Job {
00094         const MAX_TITLES_RUN = 10;
00095 
00096         function __construct( $title, $params, $id = 0 ) {
00097                 parent::__construct( 'refreshLinks2', $title, $params, $id );
00098         }
00099 
00104         function run() {
00105                 wfProfileIn( __METHOD__ );
00106 
00107                 $linkCache = LinkCache::singleton();
00108                 $linkCache->clear();
00109 
00110                 if ( is_null( $this->title ) ) {
00111                         $this->error = "refreshLinks2: Invalid title";
00112                         wfProfileOut( __METHOD__ );
00113                         return false;
00114                 } elseif ( !isset( $this->params['start'] ) || !isset( $this->params['end'] ) ) {
00115                         $this->error = "refreshLinks2: Invalid params";
00116                         wfProfileOut( __METHOD__ );
00117                         return false;
00118                 }
00119 
00120                 // Back compat for pre-r94435 jobs
00121                 $table = isset( $this->params['table'] ) ? $this->params['table'] : 'templatelinks';
00122 
00123                 // Avoid slave lag when fetching templates
00124                 if ( isset( $this->params['masterPos'] ) ) {
00125                         $masterPos = $this->params['masterPos'];
00126                 } elseif ( wfGetLB()->getServerCount() > 1  ) {
00127                         $masterPos = wfGetLB()->getMasterPos();
00128                 } else {
00129                         $masterPos = false;
00130                 }
00131 
00132                 $titles = $this->title->getBacklinkCache()->getLinks(
00133                         $table, $this->params['start'], $this->params['end'] );
00134 
00135                 if ( $titles->count() > self::MAX_TITLES_RUN ) {
00136                         # We don't want to parse too many pages per job as it can starve other jobs.
00137                         # If there are too many pages to parse, break this up into smaller jobs. By passing
00138                         # in the master position here we can cut down on the time spent waiting for slaves to
00139                         # catch up by the runners handling these jobs since time will have passed between now
00140                         # and when they pop these jobs off the queue.
00141                         $start = 0; // batch start
00142                         $end   = 0; // batch end
00143                         $bsize = 0; // batch size
00144                         $first = true; // first of batch
00145                         $jobs  = array();
00146                         foreach ( $titles as $title ) {
00147                                 $start = $first ? $title->getArticleId() : $start;
00148                                 $end   = $title->getArticleId();
00149                                 $first = false;
00150                                 if ( ++$bsize >= self::MAX_TITLES_RUN ) {
00151                                         $jobs[] = new RefreshLinksJob2( $this->title, array(
00152                                                 'table'     => $table,
00153                                                 'start'     => $start,
00154                                                 'end'       => $end,
00155                                                 'masterPos' => $masterPos
00156                                         ) );
00157                                         $first = true;
00158                                         $start = $end = $bsize = 0;
00159                                 }
00160                         }
00161                         if ( $bsize > 0 ) { // group remaining pages into a job
00162                                 $jobs[] = new RefreshLinksJob2( $this->title, array(
00163                                         'table'     => $table,
00164                                         'start'     => $start,
00165                                         'end'       => $end,
00166                                         'masterPos' => $masterPos
00167                                 ) );
00168                         }
00169                         Job::batchInsert( $jobs );
00170                 } elseif ( php_sapi_name() != 'cli' ) {
00171                         # Not suitable for page load triggered job running!
00172                         # Gracefully switch to refreshLinks jobs if this happens.
00173                         $jobs = array();
00174                         foreach ( $titles as $title ) {
00175                                 $jobs[] = new RefreshLinksJob( $title, array( 'masterPos' => $masterPos ) );
00176                         }
00177                         Job::batchInsert( $jobs );
00178                 } else {
00179                         # Wait for the DB of the current/next slave DB handle to catch up to the master.
00180                         # This way, we get the correct page_latest for templates or files that just changed
00181                         # milliseconds ago, having triggered this job to begin with.
00182                         if ( $masterPos ) {
00183                                 wfGetLB()->waitFor( $masterPos );
00184                         }
00185                         # Re-parse each page that transcludes this page and update their tracking links...
00186                         foreach ( $titles as $title ) {
00187                                 $revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
00188                                 if ( !$revision ) {
00189                                         $this->error = 'refreshLinks: Article not found "' .
00190                                                 $title->getPrefixedDBkey() . '"';
00191                                         continue; // skip this page
00192                                 }
00193                                 RefreshLinksJob::runForTitleInternal( $title, $revision, __METHOD__ );
00194                                 wfWaitForSlaves();
00195                         }
00196                 }
00197 
00198                 wfProfileOut( __METHOD__ );
00199                 return true;
00200         }
00201 }