MediaWiki
master
|
00001 <?php 00029 class RefreshLinksJob extends Job { 00030 function __construct( $title, $params = '', $id = 0 ) { 00031 parent::__construct( 'refreshLinks', $title, $params, $id ); 00032 $this->removeDuplicates = true; // job is expensive 00033 } 00034 00039 function run() { 00040 wfProfileIn( __METHOD__ ); 00041 00042 $linkCache = LinkCache::singleton(); 00043 $linkCache->clear(); 00044 00045 if ( is_null( $this->title ) ) { 00046 $this->error = "refreshLinks: Invalid title"; 00047 wfProfileOut( __METHOD__ ); 00048 return false; 00049 } 00050 00051 # Wait for the DB of the current/next slave DB handle to catch up to the master. 00052 # This way, we get the correct page_latest for templates or files that just changed 00053 # milliseconds ago, having triggered this job to begin with. 00054 if ( isset( $this->params['masterPos'] ) ) { 00055 wfGetLB()->waitFor( $this->params['masterPos'] ); 00056 } 00057 00058 $revision = Revision::newFromTitle( $this->title, false, Revision::READ_NORMAL ); 00059 if ( !$revision ) { 00060 $this->error = 'refreshLinks: Article not found "' . 00061 $this->title->getPrefixedDBkey() . '"'; 00062 wfProfileOut( __METHOD__ ); 00063 return false; // XXX: what if it was just deleted? 00064 } 00065 00066 self::runForTitleInternal( $this->title, $revision, __METHOD__ ); 00067 00068 wfProfileOut( __METHOD__ ); 00069 return true; 00070 } 00071 00072 public static function runForTitleInternal( Title $title, Revision $revision, $fname ) { 00073 wfProfileIn( $fname ); 00074 $content = $revision->getContent( Revision::RAW ); 00075 00076 if ( !$content ) { 00077 // if there is no content, pretend the content is empty 00078 $content = $revision->getContentHandler()->makeEmptyContent(); 00079 } 00080 00081 $updates = $content->getSecondaryDataUpdates( $title, null, false ); 00082 DataUpdate::runUpdates( $updates ); 00083 wfProfileOut( $fname ); 00084 } 00085 } 00086 00093 class RefreshLinksJob2 extends Job { 00094 const MAX_TITLES_RUN = 10; 00095 00096 function __construct( $title, $params, $id = 0 ) { 00097 parent::__construct( 'refreshLinks2', $title, $params, $id ); 00098 } 00099 00104 function run() { 00105 wfProfileIn( __METHOD__ ); 00106 00107 $linkCache = LinkCache::singleton(); 00108 $linkCache->clear(); 00109 00110 if ( is_null( $this->title ) ) { 00111 $this->error = "refreshLinks2: Invalid title"; 00112 wfProfileOut( __METHOD__ ); 00113 return false; 00114 } elseif ( !isset( $this->params['start'] ) || !isset( $this->params['end'] ) ) { 00115 $this->error = "refreshLinks2: Invalid params"; 00116 wfProfileOut( __METHOD__ ); 00117 return false; 00118 } 00119 00120 // Back compat for pre-r94435 jobs 00121 $table = isset( $this->params['table'] ) ? $this->params['table'] : 'templatelinks'; 00122 00123 // Avoid slave lag when fetching templates 00124 if ( isset( $this->params['masterPos'] ) ) { 00125 $masterPos = $this->params['masterPos']; 00126 } elseif ( wfGetLB()->getServerCount() > 1 ) { 00127 $masterPos = wfGetLB()->getMasterPos(); 00128 } else { 00129 $masterPos = false; 00130 } 00131 00132 $titles = $this->title->getBacklinkCache()->getLinks( 00133 $table, $this->params['start'], $this->params['end'] ); 00134 00135 if ( $titles->count() > self::MAX_TITLES_RUN ) { 00136 # We don't want to parse too many pages per job as it can starve other jobs. 00137 # If there are too many pages to parse, break this up into smaller jobs. By passing 00138 # in the master position here we can cut down on the time spent waiting for slaves to 00139 # catch up by the runners handling these jobs since time will have passed between now 00140 # and when they pop these jobs off the queue. 00141 $start = 0; // batch start 00142 $end = 0; // batch end 00143 $bsize = 0; // batch size 00144 $first = true; // first of batch 00145 $jobs = array(); 00146 foreach ( $titles as $title ) { 00147 $start = $first ? $title->getArticleId() : $start; 00148 $end = $title->getArticleId(); 00149 $first = false; 00150 if ( ++$bsize >= self::MAX_TITLES_RUN ) { 00151 $jobs[] = new RefreshLinksJob2( $this->title, array( 00152 'table' => $table, 00153 'start' => $start, 00154 'end' => $end, 00155 'masterPos' => $masterPos 00156 ) ); 00157 $first = true; 00158 $start = $end = $bsize = 0; 00159 } 00160 } 00161 if ( $bsize > 0 ) { // group remaining pages into a job 00162 $jobs[] = new RefreshLinksJob2( $this->title, array( 00163 'table' => $table, 00164 'start' => $start, 00165 'end' => $end, 00166 'masterPos' => $masterPos 00167 ) ); 00168 } 00169 Job::batchInsert( $jobs ); 00170 } elseif ( php_sapi_name() != 'cli' ) { 00171 # Not suitable for page load triggered job running! 00172 # Gracefully switch to refreshLinks jobs if this happens. 00173 $jobs = array(); 00174 foreach ( $titles as $title ) { 00175 $jobs[] = new RefreshLinksJob( $title, array( 'masterPos' => $masterPos ) ); 00176 } 00177 Job::batchInsert( $jobs ); 00178 } else { 00179 # Wait for the DB of the current/next slave DB handle to catch up to the master. 00180 # This way, we get the correct page_latest for templates or files that just changed 00181 # milliseconds ago, having triggered this job to begin with. 00182 if ( $masterPos ) { 00183 wfGetLB()->waitFor( $masterPos ); 00184 } 00185 # Re-parse each page that transcludes this page and update their tracking links... 00186 foreach ( $titles as $title ) { 00187 $revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL ); 00188 if ( !$revision ) { 00189 $this->error = 'refreshLinks: Article not found "' . 00190 $title->getPrefixedDBkey() . '"'; 00191 continue; // skip this page 00192 } 00193 RefreshLinksJob::runForTitleInternal( $title, $revision, __METHOD__ ); 00194 wfWaitForSlaves(); 00195 } 00196 } 00197 00198 wfProfileOut( __METHOD__ ); 00199 return true; 00200 } 00201 }