MediaWiki  master
Import.php
Go to the documentation of this file.
00001 <?php
00033 class WikiImporter {
00034         private $reader = null;
00035         private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback;
00036         private $mSiteInfoCallback, $mTargetNamespace, $mTargetRootPage, $mPageOutCallback;
00037         private $mNoticeCallback, $mDebug;
00038         private $mImportUploads, $mImageBasePath;
00039         private $mNoUpdates = false;
00040 
00045         function __construct( $source ) {
00046                 $this->reader = new XMLReader();
00047 
00048                 stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
00049                 $id = UploadSourceAdapter::registerSource( $source );
00050                 if (defined( 'LIBXML_PARSEHUGE' ) ) {
00051                         $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
00052                 } else {
00053                         $this->reader->open( "uploadsource://$id" );
00054                 }
00055 
00056                 // Default callbacks
00057                 $this->setRevisionCallback( array( $this, "importRevision" ) );
00058                 $this->setUploadCallback( array( $this, 'importUpload' ) );
00059                 $this->setLogItemCallback( array( $this, 'importLogItem' ) );
00060                 $this->setPageOutCallback( array( $this, 'finishImportPage' ) );
00061         }
00062 
00063         private function throwXmlError( $err ) {
00064                 $this->debug( "FAILURE: $err" );
00065                 wfDebug( "WikiImporter XML error: $err\n" );
00066         }
00067 
00068         private function debug( $data ) {
00069                 if( $this->mDebug ) {
00070                         wfDebug( "IMPORT: $data\n" );
00071                 }
00072         }
00073 
00074         private function warn( $data ) {
00075                 wfDebug( "IMPORT: $data\n" );
00076         }
00077 
00078         private function notice( $msg /*, $param, ...*/ ) {
00079                 $params = func_get_args();
00080                 array_shift( $params );
00081 
00082                 if ( is_callable( $this->mNoticeCallback ) ) {
00083                         call_user_func( $this->mNoticeCallback, $msg, $params );
00084                 } else { # No ImportReporter -> CLI
00085                         echo wfMessage( $msg, $params )->text() . "\n";
00086                 }
00087         }
00088 
00093         function setDebug( $debug ) {
00094                 $this->mDebug = $debug;
00095         }
00096 
00101         function setNoUpdates( $noupdates ) {
00102                 $this->mNoUpdates = $noupdates;
00103         }
00104 
00111         public function setNoticeCallback( $callback ) {
00112                 return wfSetVar( $this->mNoticeCallback, $callback );
00113         }
00114 
00120         public function setPageCallback( $callback ) {
00121                 $previous = $this->mPageCallback;
00122                 $this->mPageCallback = $callback;
00123                 return $previous;
00124         }
00125 
00135         public function setPageOutCallback( $callback ) {
00136                 $previous = $this->mPageOutCallback;
00137                 $this->mPageOutCallback = $callback;
00138                 return $previous;
00139         }
00140 
00146         public function setRevisionCallback( $callback ) {
00147                 $previous = $this->mRevisionCallback;
00148                 $this->mRevisionCallback = $callback;
00149                 return $previous;
00150         }
00151 
00157         public function setUploadCallback( $callback ) {
00158                 $previous = $this->mUploadCallback;
00159                 $this->mUploadCallback = $callback;
00160                 return $previous;
00161         }
00162 
00168         public function setLogItemCallback( $callback ) {
00169                 $previous = $this->mLogItemCallback;
00170                 $this->mLogItemCallback = $callback;
00171                 return $previous;
00172         }
00173 
00179         public function setSiteInfoCallback( $callback ) {
00180                 $previous = $this->mSiteInfoCallback;
00181                 $this->mSiteInfoCallback = $callback;
00182                 return $previous;
00183         }
00184 
00190         public function setTargetNamespace( $namespace ) {
00191                 if( is_null( $namespace ) ) {
00192                         // Don't override namespaces
00193                         $this->mTargetNamespace = null;
00194                 } elseif( $namespace >= 0 ) {
00195                         // @todo FIXME: Check for validity
00196                         $this->mTargetNamespace = intval( $namespace );
00197                 } else {
00198                         return false;
00199                 }
00200         }
00201 
00207         public function setTargetRootPage( $rootpage ) {
00208                 $status = Status::newGood();
00209                 if( is_null( $rootpage ) ) {
00210                         // No rootpage
00211                         $this->mTargetRootPage = null;
00212                 } elseif( $rootpage !== '' ) {
00213                         $rootpage = rtrim( $rootpage, '/' ); //avoid double slashes
00214                         $title = Title::newFromText( $rootpage, !is_null( $this->mTargetNamespace ) ? $this->mTargetNamespace : NS_MAIN );
00215                         if( !$title || $title->isExternal() ) {
00216                                 $status->fatal( 'import-rootpage-invalid' );
00217                         } else {
00218                                 if( !MWNamespace::hasSubpages( $title->getNamespace() ) ) {
00219                                         global $wgContLang;
00220 
00221                                         $displayNSText = $title->getNamespace() == NS_MAIN
00222                                                 ? wfMessage( 'blanknamespace' )->text()
00223                                                 : $wgContLang->getNsText( $title->getNamespace() );
00224                                         $status->fatal( 'import-rootpage-nosubpage', $displayNSText );
00225                                 } else {
00226                                         // set namespace to 'all', so the namespace check in processTitle() can passed
00227                                         $this->setTargetNamespace( null );
00228                                         $this->mTargetRootPage = $title->getPrefixedDBKey();
00229                                 }
00230                         }
00231                 }
00232                 return $status;
00233         }
00234 
00238         public function setImageBasePath( $dir ) {
00239                 $this->mImageBasePath = $dir;
00240         }
00241 
00245         public function setImportUploads( $import ) {
00246                 $this->mImportUploads = $import;
00247         }
00248 
00254         public function importRevision( $revision ) {
00255                 $dbw = wfGetDB( DB_MASTER );
00256                 return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
00257         }
00258 
00264         public function importLogItem( $rev ) {
00265                 $dbw = wfGetDB( DB_MASTER );
00266                 return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) );
00267         }
00268 
00274         public function importUpload( $revision ) {
00275                 $dbw = wfGetDB( DB_MASTER );
00276                 return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
00277         }
00278 
00288         public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) {
00289                 $args = func_get_args();
00290                 return wfRunHooks( 'AfterImportPage', $args );
00291         }
00292 
00297         public function debugRevisionHandler( &$revision ) {
00298                 $this->debug( "Got revision:" );
00299                 if( is_object( $revision->title ) ) {
00300                         $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
00301                 } else {
00302                         $this->debug( "-- Title: <invalid>" );
00303                 }
00304                 $this->debug( "-- User: " . $revision->user_text );
00305                 $this->debug( "-- Timestamp: " . $revision->timestamp );
00306                 $this->debug( "-- Comment: " . $revision->comment );
00307                 $this->debug( "-- Text: " . $revision->text );
00308         }
00309 
00314         function pageCallback( $title ) {
00315                 if( isset( $this->mPageCallback ) ) {
00316                         call_user_func( $this->mPageCallback, $title );
00317                 }
00318         }
00319 
00328         private function pageOutCallback( $title, $origTitle, $revCount, $sucCount, $pageInfo ) {
00329                 if( isset( $this->mPageOutCallback ) ) {
00330                         $args = func_get_args();
00331                         call_user_func_array( $this->mPageOutCallback, $args );
00332                 }
00333         }
00334 
00340         private function revisionCallback( $revision ) {
00341                 if ( isset( $this->mRevisionCallback ) ) {
00342                         return call_user_func_array( $this->mRevisionCallback,
00343                                         array( $revision, $this ) );
00344                 } else {
00345                         return false;
00346                 }
00347         }
00348 
00354         private function logItemCallback( $revision ) {
00355                 if ( isset( $this->mLogItemCallback ) ) {
00356                         return call_user_func_array( $this->mLogItemCallback,
00357                                         array( $revision, $this ) );
00358                 } else {
00359                         return false;
00360                 }
00361         }
00362 
00370         private function nodeContents() {
00371                 if( $this->reader->isEmptyElement ) {
00372                         return "";
00373                 }
00374                 $buffer = "";
00375                 while( $this->reader->read() ) {
00376                         switch( $this->reader->nodeType ) {
00377                         case XmlReader::TEXT:
00378                         case XmlReader::SIGNIFICANT_WHITESPACE:
00379                                 $buffer .= $this->reader->value;
00380                                 break;
00381                         case XmlReader::END_ELEMENT:
00382                                 return $buffer;
00383                         }
00384                 }
00385 
00386                 $this->reader->close();
00387                 return '';
00388         }
00389 
00390         # --------------
00391 
00393         private function dumpElement() {
00394                 static $lookup = null;
00395                 if (!$lookup) {
00396                         $xmlReaderConstants = array(
00397                                 "NONE",
00398                                 "ELEMENT",
00399                                 "ATTRIBUTE",
00400                                 "TEXT",
00401                                 "CDATA",
00402                                 "ENTITY_REF",
00403                                 "ENTITY",
00404                                 "PI",
00405                                 "COMMENT",
00406                                 "DOC",
00407                                 "DOC_TYPE",
00408                                 "DOC_FRAGMENT",
00409                                 "NOTATION",
00410                                 "WHITESPACE",
00411                                 "SIGNIFICANT_WHITESPACE",
00412                                 "END_ELEMENT",
00413                                 "END_ENTITY",
00414                                 "XML_DECLARATION",
00415                                 );
00416                         $lookup = array();
00417 
00418                         foreach( $xmlReaderConstants as $name ) {
00419                                 $lookup[constant("XmlReader::$name")] = $name;
00420                         }
00421                 }
00422 
00423                 print( var_dump(
00424                         $lookup[$this->reader->nodeType],
00425                         $this->reader->name,
00426                         $this->reader->value
00427                 )."\n\n" );
00428         }
00429 
00435         public function doImport() {
00436                 $this->reader->read();
00437 
00438                 if ( $this->reader->name != 'mediawiki' ) {
00439                         throw new MWException( "Expected <mediawiki> tag, got ".
00440                                 $this->reader->name );
00441                 }
00442                 $this->debug( "<mediawiki> tag is correct." );
00443 
00444                 $this->debug( "Starting primary dump processing loop." );
00445 
00446                 $keepReading = $this->reader->read();
00447                 $skip = false;
00448                 while ( $keepReading ) {
00449                         $tag = $this->reader->name;
00450                         $type = $this->reader->nodeType;
00451 
00452                         if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', $this ) ) {
00453                                 // Do nothing
00454                         } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) {
00455                                 break;
00456                         } elseif ( $tag == 'siteinfo' ) {
00457                                 $this->handleSiteInfo();
00458                         } elseif ( $tag == 'page' ) {
00459                                 $this->handlePage();
00460                         } elseif ( $tag == 'logitem' ) {
00461                                 $this->handleLogItem();
00462                         } elseif ( $tag != '#text' ) {
00463                                 $this->warn( "Unhandled top-level XML tag $tag" );
00464 
00465                                 $skip = true;
00466                         }
00467 
00468                         if ($skip) {
00469                                 $keepReading = $this->reader->next();
00470                                 $skip = false;
00471                                 $this->debug( "Skip" );
00472                         } else {
00473                                 $keepReading = $this->reader->read();
00474                         }
00475                 }
00476 
00477                 return true;
00478         }
00479 
00484         private function handleSiteInfo() {
00485                 // Site info is useful, but not actually used for dump imports.
00486                 // Includes a quick short-circuit to save performance.
00487                 if ( ! $this->mSiteInfoCallback ) {
00488                         $this->reader->next();
00489                         return true;
00490                 }
00491                 throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" );
00492         }
00493 
00494         private function handleLogItem() {
00495                 $this->debug( "Enter log item handler." );
00496                 $logInfo = array();
00497 
00498                 // Fields that can just be stuffed in the pageInfo object
00499                 $normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp',
00500                                         'logtitle', 'params' );
00501 
00502                 while ( $this->reader->read() ) {
00503                         if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00504                                         $this->reader->name == 'logitem') {
00505                                 break;
00506                         }
00507 
00508                         $tag = $this->reader->name;
00509 
00510                         if ( !wfRunHooks( 'ImportHandleLogItemXMLTag',
00511                                                 $this, $logInfo ) ) {
00512                                 // Do nothing
00513                         } elseif ( in_array( $tag, $normalFields ) ) {
00514                                 $logInfo[$tag] = $this->nodeContents();
00515                         } elseif ( $tag == 'contributor' ) {
00516                                 $logInfo['contributor'] = $this->handleContributor();
00517                         } elseif ( $tag != '#text' ) {
00518                                 $this->warn( "Unhandled log-item XML tag $tag" );
00519                         }
00520                 }
00521 
00522                 $this->processLogItem( $logInfo );
00523         }
00524 
00529         private function processLogItem( $logInfo ) {
00530                 $revision = new WikiRevision;
00531 
00532                 $revision->setID( $logInfo['id'] );
00533                 $revision->setType( $logInfo['type'] );
00534                 $revision->setAction( $logInfo['action'] );
00535                 $revision->setTimestamp( $logInfo['timestamp'] );
00536                 $revision->setParams( $logInfo['params'] );
00537                 $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
00538                 $revision->setNoUpdates( $this->mNoUpdates );
00539 
00540                 if ( isset( $logInfo['comment'] ) ) {
00541                         $revision->setComment( $logInfo['comment'] );
00542                 }
00543 
00544                 if ( isset( $logInfo['contributor']['ip'] ) ) {
00545                         $revision->setUserIP( $logInfo['contributor']['ip'] );
00546                 }
00547                 if ( isset( $logInfo['contributor']['username'] ) ) {
00548                         $revision->setUserName( $logInfo['contributor']['username'] );
00549                 }
00550 
00551                 return $this->logItemCallback( $revision );
00552         }
00553 
00554         private function handlePage() {
00555                 // Handle page data.
00556                 $this->debug( "Enter page handler." );
00557                 $pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 );
00558 
00559                 // Fields that can just be stuffed in the pageInfo object
00560                 $normalFields = array( 'title', 'id', 'redirect', 'restrictions' );
00561 
00562                 $skip = false;
00563                 $badTitle = false;
00564 
00565                 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00566                         if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00567                                         $this->reader->name == 'page') {
00568                                 break;
00569                         }
00570 
00571                         $tag = $this->reader->name;
00572 
00573                         if ( $badTitle ) {
00574                                 // The title is invalid, bail out of this page
00575                                 $skip = true;
00576                         } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', array( $this,
00577                                                 &$pageInfo ) ) ) {
00578                                 // Do nothing
00579                         } elseif ( in_array( $tag, $normalFields ) ) {
00580                                 $pageInfo[$tag] = $this->nodeContents();
00581                                 if ( $tag == 'title' ) {
00582                                         $title = $this->processTitle( $pageInfo['title'] );
00583 
00584                                         if ( !$title ) {
00585                                                 $badTitle = true;
00586                                                 $skip = true;
00587                                         }
00588 
00589                                         $this->pageCallback( $title );
00590                                         list( $pageInfo['_title'], $origTitle ) = $title;
00591                                 }
00592                         } elseif ( $tag == 'revision' ) {
00593                                 $this->handleRevision( $pageInfo );
00594                         } elseif ( $tag == 'upload' ) {
00595                                 $this->handleUpload( $pageInfo );
00596                         } elseif ( $tag != '#text' ) {
00597                                 $this->warn( "Unhandled page XML tag $tag" );
00598                                 $skip = true;
00599                         }
00600                 }
00601 
00602                 $this->pageOutCallback( $pageInfo['_title'], $origTitle,
00603                                         $pageInfo['revisionCount'],
00604                                         $pageInfo['successfulRevisionCount'],
00605                                         $pageInfo );
00606         }
00607 
00611         private function handleRevision( &$pageInfo ) {
00612                 $this->debug( "Enter revision handler" );
00613                 $revisionInfo = array();
00614 
00615                 $normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text' );
00616 
00617                 $skip = false;
00618 
00619                 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00620                         if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00621                                         $this->reader->name == 'revision') {
00622                                 break;
00623                         }
00624 
00625                         $tag = $this->reader->name;
00626 
00627                         if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', $this,
00628                                                 $pageInfo, $revisionInfo ) ) {
00629                                 // Do nothing
00630                         } elseif ( in_array( $tag, $normalFields ) ) {
00631                                 $revisionInfo[$tag] = $this->nodeContents();
00632                         } elseif ( $tag == 'contributor' ) {
00633                                 $revisionInfo['contributor'] = $this->handleContributor();
00634                         } elseif ( $tag != '#text' ) {
00635                                 $this->warn( "Unhandled revision XML tag $tag" );
00636                                 $skip = true;
00637                         }
00638                 }
00639 
00640                 $pageInfo['revisionCount']++;
00641                 if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
00642                         $pageInfo['successfulRevisionCount']++;
00643                 }
00644         }
00645 
00651         private function processRevision( $pageInfo, $revisionInfo ) {
00652                 $revision = new WikiRevision;
00653 
00654                 if( isset( $revisionInfo['id'] ) ) {
00655                         $revision->setID( $revisionInfo['id'] );
00656                 }
00657                 if ( isset( $revisionInfo['text'] ) ) {
00658                         $revision->setText( $revisionInfo['text'] );
00659                 }
00660                 if ( isset( $revisionInfo['model'] ) ) {
00661                         $revision->setModel( $revisionInfo['model'] );
00662                 }
00663                 if ( isset( $revisionInfo['format'] ) ) {
00664                         $revision->setFormat( $revisionInfo['format'] );
00665                 }
00666                 $revision->setTitle( $pageInfo['_title'] );
00667 
00668                 if ( isset( $revisionInfo['timestamp'] ) ) {
00669                         $revision->setTimestamp( $revisionInfo['timestamp'] );
00670                 } else {
00671                         $revision->setTimestamp( wfTimestampNow() );
00672                 }
00673 
00674                 if ( isset( $revisionInfo['comment'] ) ) {
00675                         $revision->setComment( $revisionInfo['comment'] );
00676                 }
00677 
00678                 if ( isset( $revisionInfo['minor'] ) ) {
00679                         $revision->setMinor( true );
00680                 }
00681                 if ( isset( $revisionInfo['contributor']['ip'] ) ) {
00682                         $revision->setUserIP( $revisionInfo['contributor']['ip'] );
00683                 }
00684                 if ( isset( $revisionInfo['contributor']['username'] ) ) {
00685                         $revision->setUserName( $revisionInfo['contributor']['username'] );
00686                 }
00687                 $revision->setNoUpdates( $this->mNoUpdates );
00688 
00689                 return $this->revisionCallback( $revision );
00690         }
00691 
00696         private function handleUpload( &$pageInfo ) {
00697                 $this->debug( "Enter upload handler" );
00698                 $uploadInfo = array();
00699 
00700                 $normalFields = array( 'timestamp', 'comment', 'filename', 'text',
00701                                         'src', 'size', 'sha1base36', 'archivename', 'rel' );
00702 
00703                 $skip = false;
00704 
00705                 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
00706                         if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00707                                         $this->reader->name == 'upload') {
00708                                 break;
00709                         }
00710 
00711                         $tag = $this->reader->name;
00712 
00713                         if ( !wfRunHooks( 'ImportHandleUploadXMLTag', $this,
00714                                                 $pageInfo ) ) {
00715                                 // Do nothing
00716                         } elseif ( in_array( $tag, $normalFields ) ) {
00717                                 $uploadInfo[$tag] = $this->nodeContents();
00718                         } elseif ( $tag == 'contributor' ) {
00719                                 $uploadInfo['contributor'] = $this->handleContributor();
00720                         } elseif ( $tag == 'contents' ) {
00721                                 $contents = $this->nodeContents();
00722                                 $encoding = $this->reader->getAttribute( 'encoding' );
00723                                 if ( $encoding === 'base64' ) {
00724                                         $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
00725                                         $uploadInfo['isTempSrc'] = true;
00726                                 }
00727                         } elseif ( $tag != '#text' ) {
00728                                 $this->warn( "Unhandled upload XML tag $tag" );
00729                                 $skip = true;
00730                         }
00731                 }
00732 
00733                 if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
00734                         $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
00735                         if ( file_exists( $path ) ) {
00736                                 $uploadInfo['fileSrc'] = $path;
00737                                 $uploadInfo['isTempSrc'] = false;
00738                         }
00739                 }
00740 
00741                 if ( $this->mImportUploads ) {
00742                         return $this->processUpload( $pageInfo, $uploadInfo );
00743                 }
00744         }
00745 
00750         private function dumpTemp( $contents ) {
00751                 $filename = tempnam( wfTempDir(), 'importupload' );
00752                 file_put_contents( $filename, $contents );
00753                 return $filename;
00754         }
00755 
00761         private function processUpload( $pageInfo, $uploadInfo ) {
00762                 $revision = new WikiRevision;
00763                 $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : '';
00764 
00765                 $revision->setTitle( $pageInfo['_title'] );
00766                 $revision->setID( $pageInfo['id'] );
00767                 $revision->setTimestamp( $uploadInfo['timestamp'] );
00768                 $revision->setText( $text );
00769                 $revision->setFilename( $uploadInfo['filename'] );
00770                 if ( isset( $uploadInfo['archivename'] ) ) {
00771                         $revision->setArchiveName( $uploadInfo['archivename'] );
00772                 }
00773                 $revision->setSrc( $uploadInfo['src'] );
00774                 if ( isset( $uploadInfo['fileSrc'] ) ) {
00775                         $revision->setFileSrc( $uploadInfo['fileSrc'],
00776                                 !empty( $uploadInfo['isTempSrc'] ) );
00777                 }
00778                 if ( isset( $uploadInfo['sha1base36'] ) ) {
00779                         $revision->setSha1Base36( $uploadInfo['sha1base36'] );
00780                 }
00781                 $revision->setSize( intval( $uploadInfo['size'] ) );
00782                 $revision->setComment( $uploadInfo['comment'] );
00783 
00784                 if ( isset( $uploadInfo['contributor']['ip'] ) ) {
00785                         $revision->setUserIP( $uploadInfo['contributor']['ip'] );
00786                 }
00787                 if ( isset( $uploadInfo['contributor']['username'] ) ) {
00788                         $revision->setUserName( $uploadInfo['contributor']['username'] );
00789                 }
00790                 $revision->setNoUpdates( $this->mNoUpdates );
00791 
00792                 return call_user_func( $this->mUploadCallback, $revision );
00793         }
00794 
00798         private function handleContributor() {
00799                 $fields = array( 'id', 'ip', 'username' );
00800                 $info = array();
00801 
00802                 while ( $this->reader->read() ) {
00803                         if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
00804                                         $this->reader->name == 'contributor') {
00805                                 break;
00806                         }
00807 
00808                         $tag = $this->reader->name;
00809 
00810                         if ( in_array( $tag, $fields ) ) {
00811                                 $info[$tag] = $this->nodeContents();
00812                         }
00813                 }
00814 
00815                 return $info;
00816         }
00817 
00822         private function processTitle( $text ) {
00823                 global $wgCommandLineMode;
00824 
00825                 $workTitle = $text;
00826                 $origTitle = Title::newFromText( $workTitle );
00827 
00828                 if( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) {
00829                         # makeTitleSafe, because $origTitle can have a interwiki (different setting of interwiki map)
00830                         # and than dbKey can begin with a lowercase char
00831                         $title = Title::makeTitleSafe( $this->mTargetNamespace,
00832                                 $origTitle->getDBkey() );
00833                 } else {
00834                         if( !is_null( $this->mTargetRootPage ) ) {
00835                                 $workTitle = $this->mTargetRootPage . '/' . $workTitle;
00836                         }
00837                         $title = Title::newFromText( $workTitle );
00838                 }
00839 
00840                 if( is_null( $title ) ) {
00841                         # Invalid page title? Ignore the page
00842                         $this->notice( 'import-error-invalid', $workTitle );
00843                         return false;
00844                 } elseif( $title->isExternal() ) {
00845                         $this->notice( 'import-error-interwiki', $title->getPrefixedText() );
00846                         return false;
00847                 } elseif( !$title->canExist() ) {
00848                         $this->notice( 'import-error-special', $title->getPrefixedText() );
00849                         return false;
00850                 } elseif( !$title->userCan( 'edit' ) && !$wgCommandLineMode ) {
00851                         # Do not import if the importing wiki user cannot edit this page
00852                         $this->notice( 'import-error-edit', $title->getPrefixedText() );
00853                         return false;
00854                 } elseif( !$title->exists() && !$title->userCan( 'create' ) && !$wgCommandLineMode ) {
00855                         # Do not import if the importing wiki user cannot create this page
00856                         $this->notice( 'import-error-create', $title->getPrefixedText() );
00857                         return false;
00858                 }
00859 
00860                 return array( $title, $origTitle );
00861         }
00862 }
00863 
00865 class UploadSourceAdapter {
00866         static $sourceRegistrations = array();
00867 
00868         private $mSource;
00869         private $mBuffer;
00870         private $mPosition;
00871 
00876         static function registerSource( $source ) {
00877                 $id = wfRandomString();
00878 
00879                 self::$sourceRegistrations[$id] = $source;
00880 
00881                 return $id;
00882         }
00883 
00891         function stream_open( $path, $mode, $options, &$opened_path ) {
00892                 $url = parse_url($path);
00893                 $id = $url['host'];
00894 
00895                 if ( !isset( self::$sourceRegistrations[$id] ) ) {
00896                         return false;
00897                 }
00898 
00899                 $this->mSource = self::$sourceRegistrations[$id];
00900 
00901                 return true;
00902         }
00903 
00908         function stream_read( $count ) {
00909                 $return = '';
00910                 $leave = false;
00911 
00912                 while ( !$leave && !$this->mSource->atEnd() &&
00913                                 strlen($this->mBuffer) < $count ) {
00914                         $read = $this->mSource->readChunk();
00915 
00916                         if ( !strlen($read) ) {
00917                                 $leave = true;
00918                         }
00919 
00920                         $this->mBuffer .= $read;
00921                 }
00922 
00923                 if ( strlen($this->mBuffer) ) {
00924                         $return = substr( $this->mBuffer, 0, $count );
00925                         $this->mBuffer = substr( $this->mBuffer, $count );
00926                 }
00927 
00928                 $this->mPosition += strlen($return);
00929 
00930                 return $return;
00931         }
00932 
00937         function stream_write( $data ) {
00938                 return false;
00939         }
00940 
00944         function stream_tell() {
00945                 return $this->mPosition;
00946         }
00947 
00951         function stream_eof() {
00952                 return $this->mSource->atEnd();
00953         }
00954 
00958         function url_stat() {
00959                 $result = array();
00960 
00961                 $result['dev'] = $result[0] = 0;
00962                 $result['ino'] = $result[1] = 0;
00963                 $result['mode'] = $result[2] = 0;
00964                 $result['nlink'] = $result[3] = 0;
00965                 $result['uid'] = $result[4] = 0;
00966                 $result['gid'] = $result[5] = 0;
00967                 $result['rdev'] = $result[6] = 0;
00968                 $result['size'] = $result[7] = 0;
00969                 $result['atime'] = $result[8] = 0;
00970                 $result['mtime'] = $result[9] = 0;
00971                 $result['ctime'] = $result[10] = 0;
00972                 $result['blksize'] = $result[11] = 0;
00973                 $result['blocks'] = $result[12] = 0;
00974 
00975                 return $result;
00976         }
00977 }
00978 
00979 class XMLReader2 extends XMLReader {
00980 
00984         function nodeContents() {
00985                 if( $this->isEmptyElement ) {
00986                         return "";
00987                 }
00988                 $buffer = "";
00989                 while( $this->read() ) {
00990                         switch( $this->nodeType ) {
00991                         case XmlReader::TEXT:
00992                         case XmlReader::SIGNIFICANT_WHITESPACE:
00993                                 $buffer .= $this->value;
00994                                 break;
00995                         case XmlReader::END_ELEMENT:
00996                                 return $buffer;
00997                         }
00998                 }
00999                 return $this->close();
01000         }
01001 }
01002 
01007 class WikiRevision {
01008         var $importer = null;
01009 
01013         var $title = null;
01014         var $id = 0;
01015         var $timestamp = "20010115000000";
01016         var $user = 0;
01017         var $user_text = "";
01018         var $model = null;
01019         var $format = null;
01020         var $text = "";
01021         var $content = null;
01022         var $comment = "";
01023         var $minor = false;
01024         var $type = "";
01025         var $action = "";
01026         var $params = "";
01027         var $fileSrc = '';
01028         var $sha1base36 = false;
01029         var $isTemp = false;
01030         var $archiveName = '';
01031         var $fileIsTemp;
01032         private $mNoUpdates = false;
01033 
01038         function setTitle( $title ) {
01039                 if( is_object( $title ) ) {
01040                         $this->title = $title;
01041                 } elseif( is_null( $title ) ) {
01042                         throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." );
01043                 } else {
01044                         throw new MWException( "WikiRevision given non-object title in import." );
01045                 }
01046         }
01047 
01051         function setID( $id ) {
01052                 $this->id = $id;
01053         }
01054 
01058         function setTimestamp( $ts ) {
01059                 # 2003-08-05T18:30:02Z
01060                 $this->timestamp = wfTimestamp( TS_MW, $ts );
01061         }
01062 
01066         function setUsername( $user ) {
01067                 $this->user_text = $user;
01068         }
01069 
01073         function setUserIP( $ip ) {
01074                 $this->user_text = $ip;
01075         }
01076 
01080         function setModel( $model ) {
01081                 $this->model = $model;
01082         }
01083 
01087         function setFormat( $format ) {
01088                 $this->format = $format;
01089         }
01090 
01094         function setText( $text ) {
01095                 $this->text = $text;
01096         }
01097 
01101         function setComment( $text ) {
01102                 $this->comment = $text;
01103         }
01104 
01108         function setMinor( $minor ) {
01109                 $this->minor = (bool)$minor;
01110         }
01111 
01115         function setSrc( $src ) {
01116                 $this->src = $src;
01117         }
01118 
01123         function setFileSrc( $src, $isTemp ) {
01124                 $this->fileSrc = $src;
01125                 $this->fileIsTemp = $isTemp;
01126         }
01127 
01131         function setSha1Base36( $sha1base36 ) {
01132                 $this->sha1base36 = $sha1base36;
01133         }
01134 
01138         function setFilename( $filename ) {
01139                 $this->filename = $filename;
01140         }
01141 
01145         function setArchiveName( $archiveName ) {
01146                 $this->archiveName = $archiveName;
01147         }
01148 
01152         function setSize( $size ) {
01153                 $this->size = intval( $size );
01154         }
01155 
01159         function setType( $type ) {
01160                 $this->type = $type;
01161         }
01162 
01166         function setAction( $action ) {
01167                 $this->action = $action;
01168         }
01169 
01173         function setParams( $params ) {
01174                 $this->params = $params;
01175         }
01176 
01180         public function setNoUpdates( $noupdates ) {
01181                 $this->mNoUpdates = $noupdates;
01182         }
01183 
01187         function getTitle() {
01188                 return $this->title;
01189         }
01190 
01194         function getID() {
01195                 return $this->id;
01196         }
01197 
01201         function getTimestamp() {
01202                 return $this->timestamp;
01203         }
01204 
01208         function getUser() {
01209                 return $this->user_text;
01210         }
01211 
01217         function getText() {
01218                 ContentHandler::deprecated( __METHOD__, '1.21' );
01219 
01220                 return $this->text;
01221         }
01222 
01226         function getContent() {
01227                 if ( is_null( $this->content ) ) {
01228                         $this->content =
01229                                 ContentHandler::makeContent(
01230                                         $this->text,
01231                                         $this->getTitle(),
01232                                         $this->getModel(),
01233                                         $this->getFormat()
01234                                 );
01235                 }
01236 
01237                 return $this->content;
01238         }
01239 
01243         function getModel() {
01244                 if ( is_null( $this->model ) ) {
01245                         $this->model = $this->getTitle()->getContentModel();
01246                 }
01247 
01248                 return $this->model;
01249         }
01250 
01254         function getFormat() {
01255                 if ( is_null( $this->model ) ) {
01256                         $this->format = ContentHandler::getForTitle( $this->getTitle() )->getDefaultFormat();
01257                 }
01258 
01259                 return $this->format;
01260         }
01261 
01265         function getComment() {
01266                 return $this->comment;
01267         }
01268 
01272         function getMinor() {
01273                 return $this->minor;
01274         }
01275 
01279         function getSrc() {
01280                 return $this->src;
01281         }
01282 
01286         function getSha1() {
01287                 if ( $this->sha1base36 ) {
01288                         return wfBaseConvert( $this->sha1base36, 36, 16 );
01289                 }
01290                 return false;
01291         }
01292 
01296         function getFileSrc() {
01297                 return $this->fileSrc;
01298         }
01299 
01303         function isTempSrc() {
01304                 return $this->isTemp;
01305         }
01306 
01310         function getFilename() {
01311                 return $this->filename;
01312         }
01313 
01317         function getArchiveName() {
01318                 return $this->archiveName;
01319         }
01320 
01324         function getSize() {
01325                 return $this->size;
01326         }
01327 
01331         function getType() {
01332                 return $this->type;
01333         }
01334 
01338         function getAction() {
01339                 return $this->action;
01340         }
01341 
01345         function getParams() {
01346                 return $this->params;
01347         }
01348 
01352         function importOldRevision() {
01353                 $dbw = wfGetDB( DB_MASTER );
01354 
01355                 # Sneak a single revision into place
01356                 $user = User::newFromName( $this->getUser() );
01357                 if( $user ) {
01358                         $userId = intval( $user->getId() );
01359                         $userText = $user->getName();
01360                         $userObj = $user;
01361                 } else {
01362                         $userId = 0;
01363                         $userText = $this->getUser();
01364                         $userObj = new User;
01365                 }
01366 
01367                 // avoid memory leak...?
01368                 $linkCache = LinkCache::singleton();
01369                 $linkCache->clear();
01370 
01371                 $page = WikiPage::factory( $this->title );
01372                 if( !$page->exists() ) {
01373                         # must create the page...
01374                         $pageId = $page->insertOn( $dbw );
01375                         $created = true;
01376                         $oldcountable = null;
01377                 } else {
01378                         $pageId = $page->getId();
01379                         $created = false;
01380 
01381                         $prior = $dbw->selectField( 'revision', '1',
01382                                 array( 'rev_page' => $pageId,
01383                                         'rev_timestamp' => $dbw->timestamp( $this->timestamp ),
01384                                         'rev_user_text' => $userText,
01385                                         'rev_comment'   => $this->getComment() ),
01386                                 __METHOD__
01387                         );
01388                         if( $prior ) {
01389                                 // @todo FIXME: This could fail slightly for multiple matches :P
01390                                 wfDebug( __METHOD__ . ": skipping existing revision for [[" .
01391                                         $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
01392                                 return false;
01393                         }
01394                         $oldcountable = $page->isCountable();
01395                 }
01396 
01397                 # @todo FIXME: Use original rev_id optionally (better for backups)
01398                 # Insert the row
01399                 $revision = new Revision( array(
01400                         'title'      => $this->title,
01401                         'page'       => $pageId,
01402                         'content_model'  => $this->getModel(),
01403                         'content_format' => $this->getFormat(),
01404                         'text'       => $this->getContent()->serialize( $this->getFormat() ), //XXX: just set 'content' => $this->getContent()?
01405                         'comment'    => $this->getComment(),
01406                         'user'       => $userId,
01407                         'user_text'  => $userText,
01408                         'timestamp'  => $this->timestamp,
01409                         'minor_edit' => $this->minor,
01410                         ) );
01411                 $revision->insertOn( $dbw );
01412                 $changed = $page->updateIfNewerOn( $dbw, $revision );
01413 
01414                 if ( $changed !== false && !$this->mNoUpdates ) {
01415                         wfDebug( __METHOD__ . ": running updates\n" );
01416                         $page->doEditUpdates( $revision, $userObj, array( 'created' => $created, 'oldcountable' => $oldcountable ) );
01417                 }
01418 
01419                 return true;
01420         }
01421 
01425         function importLogItem() {
01426                 $dbw = wfGetDB( DB_MASTER );
01427                 # @todo FIXME: This will not record autoblocks
01428                 if( !$this->getTitle() ) {
01429                         wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " .
01430                                 $this->timestamp . "\n" );
01431                         return;
01432                 }
01433                 # Check if it exists already
01434                 // @todo FIXME: Use original log ID (better for backups)
01435                 $prior = $dbw->selectField( 'logging', '1',
01436                         array( 'log_type' => $this->getType(),
01437                                 'log_action'    => $this->getAction(),
01438                                 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
01439                                 'log_namespace' => $this->getTitle()->getNamespace(),
01440                                 'log_title'     => $this->getTitle()->getDBkey(),
01441                                 'log_comment'   => $this->getComment(),
01442                                 #'log_user_text' => $this->user_text,
01443                                 'log_params'    => $this->params ),
01444                         __METHOD__
01445                 );
01446                 // @todo FIXME: This could fail slightly for multiple matches :P
01447                 if( $prior ) {
01448                         wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " .
01449                                 $this->timestamp . "\n" );
01450                         return;
01451                 }
01452                 $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' );
01453                 $data = array(
01454                         'log_id' => $log_id,
01455                         'log_type' => $this->type,
01456                         'log_action' => $this->action,
01457                         'log_timestamp' => $dbw->timestamp( $this->timestamp ),
01458                         'log_user' => User::idFromName( $this->user_text ),
01459                         #'log_user_text' => $this->user_text,
01460                         'log_namespace' => $this->getTitle()->getNamespace(),
01461                         'log_title' => $this->getTitle()->getDBkey(),
01462                         'log_comment' => $this->getComment(),
01463                         'log_params' => $this->params
01464                 );
01465                 $dbw->insert( 'logging', $data, __METHOD__ );
01466         }
01467 
01471         function importUpload() {
01472                 # Construct a file
01473                 $archiveName = $this->getArchiveName();
01474                 if ( $archiveName ) {
01475                         wfDebug( __METHOD__ . "Importing archived file as $archiveName\n" );
01476                         $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
01477                                 RepoGroup::singleton()->getLocalRepo(), $archiveName );
01478                 } else {
01479                         $file = wfLocalFile( $this->getTitle() );
01480                         wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" );
01481                         if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) {
01482                                 $archiveName = $file->getTimestamp() . '!' . $file->getName();
01483                                 $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
01484                                         RepoGroup::singleton()->getLocalRepo(), $archiveName );
01485                                 wfDebug( __METHOD__ . "File already exists; importing as $archiveName\n" );
01486                         }
01487                 }
01488                 if( !$file ) {
01489                         wfDebug( __METHOD__ . ': Bad file for ' . $this->getTitle() . "\n" );
01490                         return false;
01491                 }
01492 
01493                 # Get the file source or download if necessary
01494                 $source = $this->getFileSrc();
01495                 $flags = $this->isTempSrc() ? File::DELETE_SOURCE : 0;
01496                 if ( !$source ) {
01497                         $source = $this->downloadSource();
01498                         $flags |= File::DELETE_SOURCE;
01499                 }
01500                 if( !$source ) {
01501                         wfDebug( __METHOD__ . ": Could not fetch remote file.\n" );
01502                         return false;
01503                 }
01504                 $sha1 = $this->getSha1();
01505                 if ( $sha1 && ( $sha1 !== sha1_file( $source ) ) ) {
01506                         if ( $flags & File::DELETE_SOURCE ) {
01507                                 # Broken file; delete it if it is a temporary file
01508                                 unlink( $source );
01509                         }
01510                         wfDebug( __METHOD__ . ": Corrupt file $source.\n" );
01511                         return false;
01512                 }
01513 
01514                 $user = User::newFromName( $this->user_text );
01515 
01516                 # Do the actual upload
01517                 if ( $archiveName ) {
01518                         $status = $file->uploadOld( $source, $archiveName,
01519                                 $this->getTimestamp(), $this->getComment(), $user, $flags );
01520                 } else {
01521                         $status = $file->upload( $source, $this->getComment(), $this->getComment(),
01522                                 $flags, false, $this->getTimestamp(), $user );
01523                 }
01524 
01525                 if ( $status->isGood() ) {
01526                         wfDebug( __METHOD__ . ": Succesful\n" );
01527                         return true;
01528                 } else {
01529                         wfDebug( __METHOD__ . ': failed: ' . $status->getXml() . "\n" );
01530                         return false;
01531                 }
01532         }
01533 
01537         function downloadSource() {
01538                 global $wgEnableUploads;
01539                 if( !$wgEnableUploads ) {
01540                         return false;
01541                 }
01542 
01543                 $tempo = tempnam( wfTempDir(), 'download' );
01544                 $f = fopen( $tempo, 'wb' );
01545                 if( !$f ) {
01546                         wfDebug( "IMPORT: couldn't write to temp file $tempo\n" );
01547                         return false;
01548                 }
01549 
01550                 // @todo FIXME!
01551                 $src = $this->getSrc();
01552                 $data = Http::get( $src );
01553                 if( !$data ) {
01554                         wfDebug( "IMPORT: couldn't fetch source $src\n" );
01555                         fclose( $f );
01556                         unlink( $tempo );
01557                         return false;
01558                 }
01559 
01560                 fwrite( $f, $data );
01561                 fclose( $f );
01562 
01563                 return $tempo;
01564         }
01565 
01566 }
01567 
01572 class ImportStringSource {
01573         function __construct( $string ) {
01574                 $this->mString = $string;
01575                 $this->mRead = false;
01576         }
01577 
01581         function atEnd() {
01582                 return $this->mRead;
01583         }
01584 
01588         function readChunk() {
01589                 if( $this->atEnd() ) {
01590                         return false;
01591                 }
01592                 $this->mRead = true;
01593                 return $this->mString;
01594         }
01595 }
01596 
01601 class ImportStreamSource {
01602         function __construct( $handle ) {
01603                 $this->mHandle = $handle;
01604         }
01605 
01609         function atEnd() {
01610                 return feof( $this->mHandle );
01611         }
01612 
01616         function readChunk() {
01617                 return fread( $this->mHandle, 32768 );
01618         }
01619 
01624         static function newFromFile( $filename ) {
01625                 wfSuppressWarnings();
01626                 $file = fopen( $filename, 'rt' );
01627                 wfRestoreWarnings();
01628                 if( !$file ) {
01629                         return Status::newFatal( "importcantopen" );
01630                 }
01631                 return Status::newGood( new ImportStreamSource( $file ) );
01632         }
01633 
01638         static function newFromUpload( $fieldname = "xmlimport" ) {
01639                 $upload =& $_FILES[$fieldname];
01640 
01641                 if( !isset( $upload ) || !$upload['name'] ) {
01642                         return Status::newFatal( 'importnofile' );
01643                 }
01644                 if( !empty( $upload['error'] ) ) {
01645                         switch($upload['error']){
01646                                 case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini.
01647                                         return Status::newFatal( 'importuploaderrorsize' );
01648                                 case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form.
01649                                         return Status::newFatal( 'importuploaderrorsize' );
01650                                 case 3: # The uploaded file was only partially uploaded
01651                                         return Status::newFatal( 'importuploaderrorpartial' );
01652                                 case 6: #Missing a temporary folder.
01653                                         return Status::newFatal( 'importuploaderrortemp' );
01654                                 # case else: # Currently impossible
01655                         }
01656 
01657                 }
01658                 $fname = $upload['tmp_name'];
01659                 if( is_uploaded_file( $fname ) ) {
01660                         return ImportStreamSource::newFromFile( $fname );
01661                 } else {
01662                         return Status::newFatal( 'importnofile' );
01663                 }
01664         }
01665 
01671         static function newFromURL( $url, $method = 'GET' ) {
01672                 wfDebug( __METHOD__ . ": opening $url\n" );
01673                 # Use the standard HTTP fetch function; it times out
01674                 # quicker and sorts out user-agent problems which might
01675                 # otherwise prevent importing from large sites, such
01676                 # as the Wikimedia cluster, etc.
01677                 $data = Http::request( $method, $url, array( 'followRedirects' => true ) );
01678                 if( $data !== false ) {
01679                         $file = tmpfile();
01680                         fwrite( $file, $data );
01681                         fflush( $file );
01682                         fseek( $file, 0 );
01683                         return Status::newGood( new ImportStreamSource( $file ) );
01684                 } else {
01685                         return Status::newFatal( 'importcantopen' );
01686                 }
01687         }
01688 
01697         public static function newFromInterwiki( $interwiki, $page, $history = false, $templates = false, $pageLinkDepth = 0 ) {
01698                 if( $page == '' ) {
01699                         return Status::newFatal( 'import-noarticle' );
01700                 }
01701                 $link = Title::newFromText( "$interwiki:Special:Export/$page" );
01702                 if( is_null( $link ) || $link->getInterwiki() == '' ) {
01703                         return Status::newFatal( 'importbadinterwiki' );
01704                 } else {
01705                         $params = array();
01706                         if ( $history ) $params['history'] = 1;
01707                         if ( $templates ) $params['templates'] = 1;
01708                         if ( $pageLinkDepth ) $params['pagelink-depth'] = $pageLinkDepth;
01709                         $url = $link->getFullUrl( $params );
01710                         # For interwikis, use POST to avoid redirects.
01711                         return ImportStreamSource::newFromURL( $url, "POST" );
01712                 }
01713         }
01714 }