MediaWiki
master
|
00001 <?php 00033 class WikiImporter { 00034 private $reader = null; 00035 private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback; 00036 private $mSiteInfoCallback, $mTargetNamespace, $mTargetRootPage, $mPageOutCallback; 00037 private $mNoticeCallback, $mDebug; 00038 private $mImportUploads, $mImageBasePath; 00039 private $mNoUpdates = false; 00040 00045 function __construct( $source ) { 00046 $this->reader = new XMLReader(); 00047 00048 stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' ); 00049 $id = UploadSourceAdapter::registerSource( $source ); 00050 if (defined( 'LIBXML_PARSEHUGE' ) ) { 00051 $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE ); 00052 } else { 00053 $this->reader->open( "uploadsource://$id" ); 00054 } 00055 00056 // Default callbacks 00057 $this->setRevisionCallback( array( $this, "importRevision" ) ); 00058 $this->setUploadCallback( array( $this, 'importUpload' ) ); 00059 $this->setLogItemCallback( array( $this, 'importLogItem' ) ); 00060 $this->setPageOutCallback( array( $this, 'finishImportPage' ) ); 00061 } 00062 00063 private function throwXmlError( $err ) { 00064 $this->debug( "FAILURE: $err" ); 00065 wfDebug( "WikiImporter XML error: $err\n" ); 00066 } 00067 00068 private function debug( $data ) { 00069 if( $this->mDebug ) { 00070 wfDebug( "IMPORT: $data\n" ); 00071 } 00072 } 00073 00074 private function warn( $data ) { 00075 wfDebug( "IMPORT: $data\n" ); 00076 } 00077 00078 private function notice( $msg /*, $param, ...*/ ) { 00079 $params = func_get_args(); 00080 array_shift( $params ); 00081 00082 if ( is_callable( $this->mNoticeCallback ) ) { 00083 call_user_func( $this->mNoticeCallback, $msg, $params ); 00084 } else { # No ImportReporter -> CLI 00085 echo wfMessage( $msg, $params )->text() . "\n"; 00086 } 00087 } 00088 00093 function setDebug( $debug ) { 00094 $this->mDebug = $debug; 00095 } 00096 00101 function setNoUpdates( $noupdates ) { 00102 $this->mNoUpdates = $noupdates; 00103 } 00104 00111 public function setNoticeCallback( $callback ) { 00112 return wfSetVar( $this->mNoticeCallback, $callback ); 00113 } 00114 00120 public function setPageCallback( $callback ) { 00121 $previous = $this->mPageCallback; 00122 $this->mPageCallback = $callback; 00123 return $previous; 00124 } 00125 00135 public function setPageOutCallback( $callback ) { 00136 $previous = $this->mPageOutCallback; 00137 $this->mPageOutCallback = $callback; 00138 return $previous; 00139 } 00140 00146 public function setRevisionCallback( $callback ) { 00147 $previous = $this->mRevisionCallback; 00148 $this->mRevisionCallback = $callback; 00149 return $previous; 00150 } 00151 00157 public function setUploadCallback( $callback ) { 00158 $previous = $this->mUploadCallback; 00159 $this->mUploadCallback = $callback; 00160 return $previous; 00161 } 00162 00168 public function setLogItemCallback( $callback ) { 00169 $previous = $this->mLogItemCallback; 00170 $this->mLogItemCallback = $callback; 00171 return $previous; 00172 } 00173 00179 public function setSiteInfoCallback( $callback ) { 00180 $previous = $this->mSiteInfoCallback; 00181 $this->mSiteInfoCallback = $callback; 00182 return $previous; 00183 } 00184 00190 public function setTargetNamespace( $namespace ) { 00191 if( is_null( $namespace ) ) { 00192 // Don't override namespaces 00193 $this->mTargetNamespace = null; 00194 } elseif( $namespace >= 0 ) { 00195 // @todo FIXME: Check for validity 00196 $this->mTargetNamespace = intval( $namespace ); 00197 } else { 00198 return false; 00199 } 00200 } 00201 00207 public function setTargetRootPage( $rootpage ) { 00208 $status = Status::newGood(); 00209 if( is_null( $rootpage ) ) { 00210 // No rootpage 00211 $this->mTargetRootPage = null; 00212 } elseif( $rootpage !== '' ) { 00213 $rootpage = rtrim( $rootpage, '/' ); //avoid double slashes 00214 $title = Title::newFromText( $rootpage, !is_null( $this->mTargetNamespace ) ? $this->mTargetNamespace : NS_MAIN ); 00215 if( !$title || $title->isExternal() ) { 00216 $status->fatal( 'import-rootpage-invalid' ); 00217 } else { 00218 if( !MWNamespace::hasSubpages( $title->getNamespace() ) ) { 00219 global $wgContLang; 00220 00221 $displayNSText = $title->getNamespace() == NS_MAIN 00222 ? wfMessage( 'blanknamespace' )->text() 00223 : $wgContLang->getNsText( $title->getNamespace() ); 00224 $status->fatal( 'import-rootpage-nosubpage', $displayNSText ); 00225 } else { 00226 // set namespace to 'all', so the namespace check in processTitle() can passed 00227 $this->setTargetNamespace( null ); 00228 $this->mTargetRootPage = $title->getPrefixedDBKey(); 00229 } 00230 } 00231 } 00232 return $status; 00233 } 00234 00238 public function setImageBasePath( $dir ) { 00239 $this->mImageBasePath = $dir; 00240 } 00241 00245 public function setImportUploads( $import ) { 00246 $this->mImportUploads = $import; 00247 } 00248 00254 public function importRevision( $revision ) { 00255 $dbw = wfGetDB( DB_MASTER ); 00256 return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) ); 00257 } 00258 00264 public function importLogItem( $rev ) { 00265 $dbw = wfGetDB( DB_MASTER ); 00266 return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) ); 00267 } 00268 00274 public function importUpload( $revision ) { 00275 $dbw = wfGetDB( DB_MASTER ); 00276 return $dbw->deadlockLoop( array( $revision, 'importUpload' ) ); 00277 } 00278 00288 public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) { 00289 $args = func_get_args(); 00290 return wfRunHooks( 'AfterImportPage', $args ); 00291 } 00292 00297 public function debugRevisionHandler( &$revision ) { 00298 $this->debug( "Got revision:" ); 00299 if( is_object( $revision->title ) ) { 00300 $this->debug( "-- Title: " . $revision->title->getPrefixedText() ); 00301 } else { 00302 $this->debug( "-- Title: <invalid>" ); 00303 } 00304 $this->debug( "-- User: " . $revision->user_text ); 00305 $this->debug( "-- Timestamp: " . $revision->timestamp ); 00306 $this->debug( "-- Comment: " . $revision->comment ); 00307 $this->debug( "-- Text: " . $revision->text ); 00308 } 00309 00314 function pageCallback( $title ) { 00315 if( isset( $this->mPageCallback ) ) { 00316 call_user_func( $this->mPageCallback, $title ); 00317 } 00318 } 00319 00328 private function pageOutCallback( $title, $origTitle, $revCount, $sucCount, $pageInfo ) { 00329 if( isset( $this->mPageOutCallback ) ) { 00330 $args = func_get_args(); 00331 call_user_func_array( $this->mPageOutCallback, $args ); 00332 } 00333 } 00334 00340 private function revisionCallback( $revision ) { 00341 if ( isset( $this->mRevisionCallback ) ) { 00342 return call_user_func_array( $this->mRevisionCallback, 00343 array( $revision, $this ) ); 00344 } else { 00345 return false; 00346 } 00347 } 00348 00354 private function logItemCallback( $revision ) { 00355 if ( isset( $this->mLogItemCallback ) ) { 00356 return call_user_func_array( $this->mLogItemCallback, 00357 array( $revision, $this ) ); 00358 } else { 00359 return false; 00360 } 00361 } 00362 00370 private function nodeContents() { 00371 if( $this->reader->isEmptyElement ) { 00372 return ""; 00373 } 00374 $buffer = ""; 00375 while( $this->reader->read() ) { 00376 switch( $this->reader->nodeType ) { 00377 case XmlReader::TEXT: 00378 case XmlReader::SIGNIFICANT_WHITESPACE: 00379 $buffer .= $this->reader->value; 00380 break; 00381 case XmlReader::END_ELEMENT: 00382 return $buffer; 00383 } 00384 } 00385 00386 $this->reader->close(); 00387 return ''; 00388 } 00389 00390 # -------------- 00391 00393 private function dumpElement() { 00394 static $lookup = null; 00395 if (!$lookup) { 00396 $xmlReaderConstants = array( 00397 "NONE", 00398 "ELEMENT", 00399 "ATTRIBUTE", 00400 "TEXT", 00401 "CDATA", 00402 "ENTITY_REF", 00403 "ENTITY", 00404 "PI", 00405 "COMMENT", 00406 "DOC", 00407 "DOC_TYPE", 00408 "DOC_FRAGMENT", 00409 "NOTATION", 00410 "WHITESPACE", 00411 "SIGNIFICANT_WHITESPACE", 00412 "END_ELEMENT", 00413 "END_ENTITY", 00414 "XML_DECLARATION", 00415 ); 00416 $lookup = array(); 00417 00418 foreach( $xmlReaderConstants as $name ) { 00419 $lookup[constant("XmlReader::$name")] = $name; 00420 } 00421 } 00422 00423 print( var_dump( 00424 $lookup[$this->reader->nodeType], 00425 $this->reader->name, 00426 $this->reader->value 00427 )."\n\n" ); 00428 } 00429 00435 public function doImport() { 00436 $this->reader->read(); 00437 00438 if ( $this->reader->name != 'mediawiki' ) { 00439 throw new MWException( "Expected <mediawiki> tag, got ". 00440 $this->reader->name ); 00441 } 00442 $this->debug( "<mediawiki> tag is correct." ); 00443 00444 $this->debug( "Starting primary dump processing loop." ); 00445 00446 $keepReading = $this->reader->read(); 00447 $skip = false; 00448 while ( $keepReading ) { 00449 $tag = $this->reader->name; 00450 $type = $this->reader->nodeType; 00451 00452 if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', $this ) ) { 00453 // Do nothing 00454 } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) { 00455 break; 00456 } elseif ( $tag == 'siteinfo' ) { 00457 $this->handleSiteInfo(); 00458 } elseif ( $tag == 'page' ) { 00459 $this->handlePage(); 00460 } elseif ( $tag == 'logitem' ) { 00461 $this->handleLogItem(); 00462 } elseif ( $tag != '#text' ) { 00463 $this->warn( "Unhandled top-level XML tag $tag" ); 00464 00465 $skip = true; 00466 } 00467 00468 if ($skip) { 00469 $keepReading = $this->reader->next(); 00470 $skip = false; 00471 $this->debug( "Skip" ); 00472 } else { 00473 $keepReading = $this->reader->read(); 00474 } 00475 } 00476 00477 return true; 00478 } 00479 00484 private function handleSiteInfo() { 00485 // Site info is useful, but not actually used for dump imports. 00486 // Includes a quick short-circuit to save performance. 00487 if ( ! $this->mSiteInfoCallback ) { 00488 $this->reader->next(); 00489 return true; 00490 } 00491 throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" ); 00492 } 00493 00494 private function handleLogItem() { 00495 $this->debug( "Enter log item handler." ); 00496 $logInfo = array(); 00497 00498 // Fields that can just be stuffed in the pageInfo object 00499 $normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp', 00500 'logtitle', 'params' ); 00501 00502 while ( $this->reader->read() ) { 00503 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00504 $this->reader->name == 'logitem') { 00505 break; 00506 } 00507 00508 $tag = $this->reader->name; 00509 00510 if ( !wfRunHooks( 'ImportHandleLogItemXMLTag', 00511 $this, $logInfo ) ) { 00512 // Do nothing 00513 } elseif ( in_array( $tag, $normalFields ) ) { 00514 $logInfo[$tag] = $this->nodeContents(); 00515 } elseif ( $tag == 'contributor' ) { 00516 $logInfo['contributor'] = $this->handleContributor(); 00517 } elseif ( $tag != '#text' ) { 00518 $this->warn( "Unhandled log-item XML tag $tag" ); 00519 } 00520 } 00521 00522 $this->processLogItem( $logInfo ); 00523 } 00524 00529 private function processLogItem( $logInfo ) { 00530 $revision = new WikiRevision; 00531 00532 $revision->setID( $logInfo['id'] ); 00533 $revision->setType( $logInfo['type'] ); 00534 $revision->setAction( $logInfo['action'] ); 00535 $revision->setTimestamp( $logInfo['timestamp'] ); 00536 $revision->setParams( $logInfo['params'] ); 00537 $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) ); 00538 $revision->setNoUpdates( $this->mNoUpdates ); 00539 00540 if ( isset( $logInfo['comment'] ) ) { 00541 $revision->setComment( $logInfo['comment'] ); 00542 } 00543 00544 if ( isset( $logInfo['contributor']['ip'] ) ) { 00545 $revision->setUserIP( $logInfo['contributor']['ip'] ); 00546 } 00547 if ( isset( $logInfo['contributor']['username'] ) ) { 00548 $revision->setUserName( $logInfo['contributor']['username'] ); 00549 } 00550 00551 return $this->logItemCallback( $revision ); 00552 } 00553 00554 private function handlePage() { 00555 // Handle page data. 00556 $this->debug( "Enter page handler." ); 00557 $pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 ); 00558 00559 // Fields that can just be stuffed in the pageInfo object 00560 $normalFields = array( 'title', 'id', 'redirect', 'restrictions' ); 00561 00562 $skip = false; 00563 $badTitle = false; 00564 00565 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00566 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00567 $this->reader->name == 'page') { 00568 break; 00569 } 00570 00571 $tag = $this->reader->name; 00572 00573 if ( $badTitle ) { 00574 // The title is invalid, bail out of this page 00575 $skip = true; 00576 } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', array( $this, 00577 &$pageInfo ) ) ) { 00578 // Do nothing 00579 } elseif ( in_array( $tag, $normalFields ) ) { 00580 $pageInfo[$tag] = $this->nodeContents(); 00581 if ( $tag == 'title' ) { 00582 $title = $this->processTitle( $pageInfo['title'] ); 00583 00584 if ( !$title ) { 00585 $badTitle = true; 00586 $skip = true; 00587 } 00588 00589 $this->pageCallback( $title ); 00590 list( $pageInfo['_title'], $origTitle ) = $title; 00591 } 00592 } elseif ( $tag == 'revision' ) { 00593 $this->handleRevision( $pageInfo ); 00594 } elseif ( $tag == 'upload' ) { 00595 $this->handleUpload( $pageInfo ); 00596 } elseif ( $tag != '#text' ) { 00597 $this->warn( "Unhandled page XML tag $tag" ); 00598 $skip = true; 00599 } 00600 } 00601 00602 $this->pageOutCallback( $pageInfo['_title'], $origTitle, 00603 $pageInfo['revisionCount'], 00604 $pageInfo['successfulRevisionCount'], 00605 $pageInfo ); 00606 } 00607 00611 private function handleRevision( &$pageInfo ) { 00612 $this->debug( "Enter revision handler" ); 00613 $revisionInfo = array(); 00614 00615 $normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text' ); 00616 00617 $skip = false; 00618 00619 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00620 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00621 $this->reader->name == 'revision') { 00622 break; 00623 } 00624 00625 $tag = $this->reader->name; 00626 00627 if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', $this, 00628 $pageInfo, $revisionInfo ) ) { 00629 // Do nothing 00630 } elseif ( in_array( $tag, $normalFields ) ) { 00631 $revisionInfo[$tag] = $this->nodeContents(); 00632 } elseif ( $tag == 'contributor' ) { 00633 $revisionInfo['contributor'] = $this->handleContributor(); 00634 } elseif ( $tag != '#text' ) { 00635 $this->warn( "Unhandled revision XML tag $tag" ); 00636 $skip = true; 00637 } 00638 } 00639 00640 $pageInfo['revisionCount']++; 00641 if ( $this->processRevision( $pageInfo, $revisionInfo ) ) { 00642 $pageInfo['successfulRevisionCount']++; 00643 } 00644 } 00645 00651 private function processRevision( $pageInfo, $revisionInfo ) { 00652 $revision = new WikiRevision; 00653 00654 if( isset( $revisionInfo['id'] ) ) { 00655 $revision->setID( $revisionInfo['id'] ); 00656 } 00657 if ( isset( $revisionInfo['text'] ) ) { 00658 $revision->setText( $revisionInfo['text'] ); 00659 } 00660 if ( isset( $revisionInfo['model'] ) ) { 00661 $revision->setModel( $revisionInfo['model'] ); 00662 } 00663 if ( isset( $revisionInfo['format'] ) ) { 00664 $revision->setFormat( $revisionInfo['format'] ); 00665 } 00666 $revision->setTitle( $pageInfo['_title'] ); 00667 00668 if ( isset( $revisionInfo['timestamp'] ) ) { 00669 $revision->setTimestamp( $revisionInfo['timestamp'] ); 00670 } else { 00671 $revision->setTimestamp( wfTimestampNow() ); 00672 } 00673 00674 if ( isset( $revisionInfo['comment'] ) ) { 00675 $revision->setComment( $revisionInfo['comment'] ); 00676 } 00677 00678 if ( isset( $revisionInfo['minor'] ) ) { 00679 $revision->setMinor( true ); 00680 } 00681 if ( isset( $revisionInfo['contributor']['ip'] ) ) { 00682 $revision->setUserIP( $revisionInfo['contributor']['ip'] ); 00683 } 00684 if ( isset( $revisionInfo['contributor']['username'] ) ) { 00685 $revision->setUserName( $revisionInfo['contributor']['username'] ); 00686 } 00687 $revision->setNoUpdates( $this->mNoUpdates ); 00688 00689 return $this->revisionCallback( $revision ); 00690 } 00691 00696 private function handleUpload( &$pageInfo ) { 00697 $this->debug( "Enter upload handler" ); 00698 $uploadInfo = array(); 00699 00700 $normalFields = array( 'timestamp', 'comment', 'filename', 'text', 00701 'src', 'size', 'sha1base36', 'archivename', 'rel' ); 00702 00703 $skip = false; 00704 00705 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00706 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00707 $this->reader->name == 'upload') { 00708 break; 00709 } 00710 00711 $tag = $this->reader->name; 00712 00713 if ( !wfRunHooks( 'ImportHandleUploadXMLTag', $this, 00714 $pageInfo ) ) { 00715 // Do nothing 00716 } elseif ( in_array( $tag, $normalFields ) ) { 00717 $uploadInfo[$tag] = $this->nodeContents(); 00718 } elseif ( $tag == 'contributor' ) { 00719 $uploadInfo['contributor'] = $this->handleContributor(); 00720 } elseif ( $tag == 'contents' ) { 00721 $contents = $this->nodeContents(); 00722 $encoding = $this->reader->getAttribute( 'encoding' ); 00723 if ( $encoding === 'base64' ) { 00724 $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) ); 00725 $uploadInfo['isTempSrc'] = true; 00726 } 00727 } elseif ( $tag != '#text' ) { 00728 $this->warn( "Unhandled upload XML tag $tag" ); 00729 $skip = true; 00730 } 00731 } 00732 00733 if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) { 00734 $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}"; 00735 if ( file_exists( $path ) ) { 00736 $uploadInfo['fileSrc'] = $path; 00737 $uploadInfo['isTempSrc'] = false; 00738 } 00739 } 00740 00741 if ( $this->mImportUploads ) { 00742 return $this->processUpload( $pageInfo, $uploadInfo ); 00743 } 00744 } 00745 00750 private function dumpTemp( $contents ) { 00751 $filename = tempnam( wfTempDir(), 'importupload' ); 00752 file_put_contents( $filename, $contents ); 00753 return $filename; 00754 } 00755 00761 private function processUpload( $pageInfo, $uploadInfo ) { 00762 $revision = new WikiRevision; 00763 $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : ''; 00764 00765 $revision->setTitle( $pageInfo['_title'] ); 00766 $revision->setID( $pageInfo['id'] ); 00767 $revision->setTimestamp( $uploadInfo['timestamp'] ); 00768 $revision->setText( $text ); 00769 $revision->setFilename( $uploadInfo['filename'] ); 00770 if ( isset( $uploadInfo['archivename'] ) ) { 00771 $revision->setArchiveName( $uploadInfo['archivename'] ); 00772 } 00773 $revision->setSrc( $uploadInfo['src'] ); 00774 if ( isset( $uploadInfo['fileSrc'] ) ) { 00775 $revision->setFileSrc( $uploadInfo['fileSrc'], 00776 !empty( $uploadInfo['isTempSrc'] ) ); 00777 } 00778 if ( isset( $uploadInfo['sha1base36'] ) ) { 00779 $revision->setSha1Base36( $uploadInfo['sha1base36'] ); 00780 } 00781 $revision->setSize( intval( $uploadInfo['size'] ) ); 00782 $revision->setComment( $uploadInfo['comment'] ); 00783 00784 if ( isset( $uploadInfo['contributor']['ip'] ) ) { 00785 $revision->setUserIP( $uploadInfo['contributor']['ip'] ); 00786 } 00787 if ( isset( $uploadInfo['contributor']['username'] ) ) { 00788 $revision->setUserName( $uploadInfo['contributor']['username'] ); 00789 } 00790 $revision->setNoUpdates( $this->mNoUpdates ); 00791 00792 return call_user_func( $this->mUploadCallback, $revision ); 00793 } 00794 00798 private function handleContributor() { 00799 $fields = array( 'id', 'ip', 'username' ); 00800 $info = array(); 00801 00802 while ( $this->reader->read() ) { 00803 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00804 $this->reader->name == 'contributor') { 00805 break; 00806 } 00807 00808 $tag = $this->reader->name; 00809 00810 if ( in_array( $tag, $fields ) ) { 00811 $info[$tag] = $this->nodeContents(); 00812 } 00813 } 00814 00815 return $info; 00816 } 00817 00822 private function processTitle( $text ) { 00823 global $wgCommandLineMode; 00824 00825 $workTitle = $text; 00826 $origTitle = Title::newFromText( $workTitle ); 00827 00828 if( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) { 00829 # makeTitleSafe, because $origTitle can have a interwiki (different setting of interwiki map) 00830 # and than dbKey can begin with a lowercase char 00831 $title = Title::makeTitleSafe( $this->mTargetNamespace, 00832 $origTitle->getDBkey() ); 00833 } else { 00834 if( !is_null( $this->mTargetRootPage ) ) { 00835 $workTitle = $this->mTargetRootPage . '/' . $workTitle; 00836 } 00837 $title = Title::newFromText( $workTitle ); 00838 } 00839 00840 if( is_null( $title ) ) { 00841 # Invalid page title? Ignore the page 00842 $this->notice( 'import-error-invalid', $workTitle ); 00843 return false; 00844 } elseif( $title->isExternal() ) { 00845 $this->notice( 'import-error-interwiki', $title->getPrefixedText() ); 00846 return false; 00847 } elseif( !$title->canExist() ) { 00848 $this->notice( 'import-error-special', $title->getPrefixedText() ); 00849 return false; 00850 } elseif( !$title->userCan( 'edit' ) && !$wgCommandLineMode ) { 00851 # Do not import if the importing wiki user cannot edit this page 00852 $this->notice( 'import-error-edit', $title->getPrefixedText() ); 00853 return false; 00854 } elseif( !$title->exists() && !$title->userCan( 'create' ) && !$wgCommandLineMode ) { 00855 # Do not import if the importing wiki user cannot create this page 00856 $this->notice( 'import-error-create', $title->getPrefixedText() ); 00857 return false; 00858 } 00859 00860 return array( $title, $origTitle ); 00861 } 00862 } 00863 00865 class UploadSourceAdapter { 00866 static $sourceRegistrations = array(); 00867 00868 private $mSource; 00869 private $mBuffer; 00870 private $mPosition; 00871 00876 static function registerSource( $source ) { 00877 $id = wfRandomString(); 00878 00879 self::$sourceRegistrations[$id] = $source; 00880 00881 return $id; 00882 } 00883 00891 function stream_open( $path, $mode, $options, &$opened_path ) { 00892 $url = parse_url($path); 00893 $id = $url['host']; 00894 00895 if ( !isset( self::$sourceRegistrations[$id] ) ) { 00896 return false; 00897 } 00898 00899 $this->mSource = self::$sourceRegistrations[$id]; 00900 00901 return true; 00902 } 00903 00908 function stream_read( $count ) { 00909 $return = ''; 00910 $leave = false; 00911 00912 while ( !$leave && !$this->mSource->atEnd() && 00913 strlen($this->mBuffer) < $count ) { 00914 $read = $this->mSource->readChunk(); 00915 00916 if ( !strlen($read) ) { 00917 $leave = true; 00918 } 00919 00920 $this->mBuffer .= $read; 00921 } 00922 00923 if ( strlen($this->mBuffer) ) { 00924 $return = substr( $this->mBuffer, 0, $count ); 00925 $this->mBuffer = substr( $this->mBuffer, $count ); 00926 } 00927 00928 $this->mPosition += strlen($return); 00929 00930 return $return; 00931 } 00932 00937 function stream_write( $data ) { 00938 return false; 00939 } 00940 00944 function stream_tell() { 00945 return $this->mPosition; 00946 } 00947 00951 function stream_eof() { 00952 return $this->mSource->atEnd(); 00953 } 00954 00958 function url_stat() { 00959 $result = array(); 00960 00961 $result['dev'] = $result[0] = 0; 00962 $result['ino'] = $result[1] = 0; 00963 $result['mode'] = $result[2] = 0; 00964 $result['nlink'] = $result[3] = 0; 00965 $result['uid'] = $result[4] = 0; 00966 $result['gid'] = $result[5] = 0; 00967 $result['rdev'] = $result[6] = 0; 00968 $result['size'] = $result[7] = 0; 00969 $result['atime'] = $result[8] = 0; 00970 $result['mtime'] = $result[9] = 0; 00971 $result['ctime'] = $result[10] = 0; 00972 $result['blksize'] = $result[11] = 0; 00973 $result['blocks'] = $result[12] = 0; 00974 00975 return $result; 00976 } 00977 } 00978 00979 class XMLReader2 extends XMLReader { 00980 00984 function nodeContents() { 00985 if( $this->isEmptyElement ) { 00986 return ""; 00987 } 00988 $buffer = ""; 00989 while( $this->read() ) { 00990 switch( $this->nodeType ) { 00991 case XmlReader::TEXT: 00992 case XmlReader::SIGNIFICANT_WHITESPACE: 00993 $buffer .= $this->value; 00994 break; 00995 case XmlReader::END_ELEMENT: 00996 return $buffer; 00997 } 00998 } 00999 return $this->close(); 01000 } 01001 } 01002 01007 class WikiRevision { 01008 var $importer = null; 01009 01013 var $title = null; 01014 var $id = 0; 01015 var $timestamp = "20010115000000"; 01016 var $user = 0; 01017 var $user_text = ""; 01018 var $model = null; 01019 var $format = null; 01020 var $text = ""; 01021 var $content = null; 01022 var $comment = ""; 01023 var $minor = false; 01024 var $type = ""; 01025 var $action = ""; 01026 var $params = ""; 01027 var $fileSrc = ''; 01028 var $sha1base36 = false; 01029 var $isTemp = false; 01030 var $archiveName = ''; 01031 var $fileIsTemp; 01032 private $mNoUpdates = false; 01033 01038 function setTitle( $title ) { 01039 if( is_object( $title ) ) { 01040 $this->title = $title; 01041 } elseif( is_null( $title ) ) { 01042 throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." ); 01043 } else { 01044 throw new MWException( "WikiRevision given non-object title in import." ); 01045 } 01046 } 01047 01051 function setID( $id ) { 01052 $this->id = $id; 01053 } 01054 01058 function setTimestamp( $ts ) { 01059 # 2003-08-05T18:30:02Z 01060 $this->timestamp = wfTimestamp( TS_MW, $ts ); 01061 } 01062 01066 function setUsername( $user ) { 01067 $this->user_text = $user; 01068 } 01069 01073 function setUserIP( $ip ) { 01074 $this->user_text = $ip; 01075 } 01076 01080 function setModel( $model ) { 01081 $this->model = $model; 01082 } 01083 01087 function setFormat( $format ) { 01088 $this->format = $format; 01089 } 01090 01094 function setText( $text ) { 01095 $this->text = $text; 01096 } 01097 01101 function setComment( $text ) { 01102 $this->comment = $text; 01103 } 01104 01108 function setMinor( $minor ) { 01109 $this->minor = (bool)$minor; 01110 } 01111 01115 function setSrc( $src ) { 01116 $this->src = $src; 01117 } 01118 01123 function setFileSrc( $src, $isTemp ) { 01124 $this->fileSrc = $src; 01125 $this->fileIsTemp = $isTemp; 01126 } 01127 01131 function setSha1Base36( $sha1base36 ) { 01132 $this->sha1base36 = $sha1base36; 01133 } 01134 01138 function setFilename( $filename ) { 01139 $this->filename = $filename; 01140 } 01141 01145 function setArchiveName( $archiveName ) { 01146 $this->archiveName = $archiveName; 01147 } 01148 01152 function setSize( $size ) { 01153 $this->size = intval( $size ); 01154 } 01155 01159 function setType( $type ) { 01160 $this->type = $type; 01161 } 01162 01166 function setAction( $action ) { 01167 $this->action = $action; 01168 } 01169 01173 function setParams( $params ) { 01174 $this->params = $params; 01175 } 01176 01180 public function setNoUpdates( $noupdates ) { 01181 $this->mNoUpdates = $noupdates; 01182 } 01183 01187 function getTitle() { 01188 return $this->title; 01189 } 01190 01194 function getID() { 01195 return $this->id; 01196 } 01197 01201 function getTimestamp() { 01202 return $this->timestamp; 01203 } 01204 01208 function getUser() { 01209 return $this->user_text; 01210 } 01211 01217 function getText() { 01218 ContentHandler::deprecated( __METHOD__, '1.21' ); 01219 01220 return $this->text; 01221 } 01222 01226 function getContent() { 01227 if ( is_null( $this->content ) ) { 01228 $this->content = 01229 ContentHandler::makeContent( 01230 $this->text, 01231 $this->getTitle(), 01232 $this->getModel(), 01233 $this->getFormat() 01234 ); 01235 } 01236 01237 return $this->content; 01238 } 01239 01243 function getModel() { 01244 if ( is_null( $this->model ) ) { 01245 $this->model = $this->getTitle()->getContentModel(); 01246 } 01247 01248 return $this->model; 01249 } 01250 01254 function getFormat() { 01255 if ( is_null( $this->model ) ) { 01256 $this->format = ContentHandler::getForTitle( $this->getTitle() )->getDefaultFormat(); 01257 } 01258 01259 return $this->format; 01260 } 01261 01265 function getComment() { 01266 return $this->comment; 01267 } 01268 01272 function getMinor() { 01273 return $this->minor; 01274 } 01275 01279 function getSrc() { 01280 return $this->src; 01281 } 01282 01286 function getSha1() { 01287 if ( $this->sha1base36 ) { 01288 return wfBaseConvert( $this->sha1base36, 36, 16 ); 01289 } 01290 return false; 01291 } 01292 01296 function getFileSrc() { 01297 return $this->fileSrc; 01298 } 01299 01303 function isTempSrc() { 01304 return $this->isTemp; 01305 } 01306 01310 function getFilename() { 01311 return $this->filename; 01312 } 01313 01317 function getArchiveName() { 01318 return $this->archiveName; 01319 } 01320 01324 function getSize() { 01325 return $this->size; 01326 } 01327 01331 function getType() { 01332 return $this->type; 01333 } 01334 01338 function getAction() { 01339 return $this->action; 01340 } 01341 01345 function getParams() { 01346 return $this->params; 01347 } 01348 01352 function importOldRevision() { 01353 $dbw = wfGetDB( DB_MASTER ); 01354 01355 # Sneak a single revision into place 01356 $user = User::newFromName( $this->getUser() ); 01357 if( $user ) { 01358 $userId = intval( $user->getId() ); 01359 $userText = $user->getName(); 01360 $userObj = $user; 01361 } else { 01362 $userId = 0; 01363 $userText = $this->getUser(); 01364 $userObj = new User; 01365 } 01366 01367 // avoid memory leak...? 01368 $linkCache = LinkCache::singleton(); 01369 $linkCache->clear(); 01370 01371 $page = WikiPage::factory( $this->title ); 01372 if( !$page->exists() ) { 01373 # must create the page... 01374 $pageId = $page->insertOn( $dbw ); 01375 $created = true; 01376 $oldcountable = null; 01377 } else { 01378 $pageId = $page->getId(); 01379 $created = false; 01380 01381 $prior = $dbw->selectField( 'revision', '1', 01382 array( 'rev_page' => $pageId, 01383 'rev_timestamp' => $dbw->timestamp( $this->timestamp ), 01384 'rev_user_text' => $userText, 01385 'rev_comment' => $this->getComment() ), 01386 __METHOD__ 01387 ); 01388 if( $prior ) { 01389 // @todo FIXME: This could fail slightly for multiple matches :P 01390 wfDebug( __METHOD__ . ": skipping existing revision for [[" . 01391 $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" ); 01392 return false; 01393 } 01394 $oldcountable = $page->isCountable(); 01395 } 01396 01397 # @todo FIXME: Use original rev_id optionally (better for backups) 01398 # Insert the row 01399 $revision = new Revision( array( 01400 'title' => $this->title, 01401 'page' => $pageId, 01402 'content_model' => $this->getModel(), 01403 'content_format' => $this->getFormat(), 01404 'text' => $this->getContent()->serialize( $this->getFormat() ), //XXX: just set 'content' => $this->getContent()? 01405 'comment' => $this->getComment(), 01406 'user' => $userId, 01407 'user_text' => $userText, 01408 'timestamp' => $this->timestamp, 01409 'minor_edit' => $this->minor, 01410 ) ); 01411 $revision->insertOn( $dbw ); 01412 $changed = $page->updateIfNewerOn( $dbw, $revision ); 01413 01414 if ( $changed !== false && !$this->mNoUpdates ) { 01415 wfDebug( __METHOD__ . ": running updates\n" ); 01416 $page->doEditUpdates( $revision, $userObj, array( 'created' => $created, 'oldcountable' => $oldcountable ) ); 01417 } 01418 01419 return true; 01420 } 01421 01425 function importLogItem() { 01426 $dbw = wfGetDB( DB_MASTER ); 01427 # @todo FIXME: This will not record autoblocks 01428 if( !$this->getTitle() ) { 01429 wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " . 01430 $this->timestamp . "\n" ); 01431 return; 01432 } 01433 # Check if it exists already 01434 // @todo FIXME: Use original log ID (better for backups) 01435 $prior = $dbw->selectField( 'logging', '1', 01436 array( 'log_type' => $this->getType(), 01437 'log_action' => $this->getAction(), 01438 'log_timestamp' => $dbw->timestamp( $this->timestamp ), 01439 'log_namespace' => $this->getTitle()->getNamespace(), 01440 'log_title' => $this->getTitle()->getDBkey(), 01441 'log_comment' => $this->getComment(), 01442 #'log_user_text' => $this->user_text, 01443 'log_params' => $this->params ), 01444 __METHOD__ 01445 ); 01446 // @todo FIXME: This could fail slightly for multiple matches :P 01447 if( $prior ) { 01448 wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " . 01449 $this->timestamp . "\n" ); 01450 return; 01451 } 01452 $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' ); 01453 $data = array( 01454 'log_id' => $log_id, 01455 'log_type' => $this->type, 01456 'log_action' => $this->action, 01457 'log_timestamp' => $dbw->timestamp( $this->timestamp ), 01458 'log_user' => User::idFromName( $this->user_text ), 01459 #'log_user_text' => $this->user_text, 01460 'log_namespace' => $this->getTitle()->getNamespace(), 01461 'log_title' => $this->getTitle()->getDBkey(), 01462 'log_comment' => $this->getComment(), 01463 'log_params' => $this->params 01464 ); 01465 $dbw->insert( 'logging', $data, __METHOD__ ); 01466 } 01467 01471 function importUpload() { 01472 # Construct a file 01473 $archiveName = $this->getArchiveName(); 01474 if ( $archiveName ) { 01475 wfDebug( __METHOD__ . "Importing archived file as $archiveName\n" ); 01476 $file = OldLocalFile::newFromArchiveName( $this->getTitle(), 01477 RepoGroup::singleton()->getLocalRepo(), $archiveName ); 01478 } else { 01479 $file = wfLocalFile( $this->getTitle() ); 01480 wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" ); 01481 if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) { 01482 $archiveName = $file->getTimestamp() . '!' . $file->getName(); 01483 $file = OldLocalFile::newFromArchiveName( $this->getTitle(), 01484 RepoGroup::singleton()->getLocalRepo(), $archiveName ); 01485 wfDebug( __METHOD__ . "File already exists; importing as $archiveName\n" ); 01486 } 01487 } 01488 if( !$file ) { 01489 wfDebug( __METHOD__ . ': Bad file for ' . $this->getTitle() . "\n" ); 01490 return false; 01491 } 01492 01493 # Get the file source or download if necessary 01494 $source = $this->getFileSrc(); 01495 $flags = $this->isTempSrc() ? File::DELETE_SOURCE : 0; 01496 if ( !$source ) { 01497 $source = $this->downloadSource(); 01498 $flags |= File::DELETE_SOURCE; 01499 } 01500 if( !$source ) { 01501 wfDebug( __METHOD__ . ": Could not fetch remote file.\n" ); 01502 return false; 01503 } 01504 $sha1 = $this->getSha1(); 01505 if ( $sha1 && ( $sha1 !== sha1_file( $source ) ) ) { 01506 if ( $flags & File::DELETE_SOURCE ) { 01507 # Broken file; delete it if it is a temporary file 01508 unlink( $source ); 01509 } 01510 wfDebug( __METHOD__ . ": Corrupt file $source.\n" ); 01511 return false; 01512 } 01513 01514 $user = User::newFromName( $this->user_text ); 01515 01516 # Do the actual upload 01517 if ( $archiveName ) { 01518 $status = $file->uploadOld( $source, $archiveName, 01519 $this->getTimestamp(), $this->getComment(), $user, $flags ); 01520 } else { 01521 $status = $file->upload( $source, $this->getComment(), $this->getComment(), 01522 $flags, false, $this->getTimestamp(), $user ); 01523 } 01524 01525 if ( $status->isGood() ) { 01526 wfDebug( __METHOD__ . ": Succesful\n" ); 01527 return true; 01528 } else { 01529 wfDebug( __METHOD__ . ': failed: ' . $status->getXml() . "\n" ); 01530 return false; 01531 } 01532 } 01533 01537 function downloadSource() { 01538 global $wgEnableUploads; 01539 if( !$wgEnableUploads ) { 01540 return false; 01541 } 01542 01543 $tempo = tempnam( wfTempDir(), 'download' ); 01544 $f = fopen( $tempo, 'wb' ); 01545 if( !$f ) { 01546 wfDebug( "IMPORT: couldn't write to temp file $tempo\n" ); 01547 return false; 01548 } 01549 01550 // @todo FIXME! 01551 $src = $this->getSrc(); 01552 $data = Http::get( $src ); 01553 if( !$data ) { 01554 wfDebug( "IMPORT: couldn't fetch source $src\n" ); 01555 fclose( $f ); 01556 unlink( $tempo ); 01557 return false; 01558 } 01559 01560 fwrite( $f, $data ); 01561 fclose( $f ); 01562 01563 return $tempo; 01564 } 01565 01566 } 01567 01572 class ImportStringSource { 01573 function __construct( $string ) { 01574 $this->mString = $string; 01575 $this->mRead = false; 01576 } 01577 01581 function atEnd() { 01582 return $this->mRead; 01583 } 01584 01588 function readChunk() { 01589 if( $this->atEnd() ) { 01590 return false; 01591 } 01592 $this->mRead = true; 01593 return $this->mString; 01594 } 01595 } 01596 01601 class ImportStreamSource { 01602 function __construct( $handle ) { 01603 $this->mHandle = $handle; 01604 } 01605 01609 function atEnd() { 01610 return feof( $this->mHandle ); 01611 } 01612 01616 function readChunk() { 01617 return fread( $this->mHandle, 32768 ); 01618 } 01619 01624 static function newFromFile( $filename ) { 01625 wfSuppressWarnings(); 01626 $file = fopen( $filename, 'rt' ); 01627 wfRestoreWarnings(); 01628 if( !$file ) { 01629 return Status::newFatal( "importcantopen" ); 01630 } 01631 return Status::newGood( new ImportStreamSource( $file ) ); 01632 } 01633 01638 static function newFromUpload( $fieldname = "xmlimport" ) { 01639 $upload =& $_FILES[$fieldname]; 01640 01641 if( !isset( $upload ) || !$upload['name'] ) { 01642 return Status::newFatal( 'importnofile' ); 01643 } 01644 if( !empty( $upload['error'] ) ) { 01645 switch($upload['error']){ 01646 case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini. 01647 return Status::newFatal( 'importuploaderrorsize' ); 01648 case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form. 01649 return Status::newFatal( 'importuploaderrorsize' ); 01650 case 3: # The uploaded file was only partially uploaded 01651 return Status::newFatal( 'importuploaderrorpartial' ); 01652 case 6: #Missing a temporary folder. 01653 return Status::newFatal( 'importuploaderrortemp' ); 01654 # case else: # Currently impossible 01655 } 01656 01657 } 01658 $fname = $upload['tmp_name']; 01659 if( is_uploaded_file( $fname ) ) { 01660 return ImportStreamSource::newFromFile( $fname ); 01661 } else { 01662 return Status::newFatal( 'importnofile' ); 01663 } 01664 } 01665 01671 static function newFromURL( $url, $method = 'GET' ) { 01672 wfDebug( __METHOD__ . ": opening $url\n" ); 01673 # Use the standard HTTP fetch function; it times out 01674 # quicker and sorts out user-agent problems which might 01675 # otherwise prevent importing from large sites, such 01676 # as the Wikimedia cluster, etc. 01677 $data = Http::request( $method, $url, array( 'followRedirects' => true ) ); 01678 if( $data !== false ) { 01679 $file = tmpfile(); 01680 fwrite( $file, $data ); 01681 fflush( $file ); 01682 fseek( $file, 0 ); 01683 return Status::newGood( new ImportStreamSource( $file ) ); 01684 } else { 01685 return Status::newFatal( 'importcantopen' ); 01686 } 01687 } 01688 01697 public static function newFromInterwiki( $interwiki, $page, $history = false, $templates = false, $pageLinkDepth = 0 ) { 01698 if( $page == '' ) { 01699 return Status::newFatal( 'import-noarticle' ); 01700 } 01701 $link = Title::newFromText( "$interwiki:Special:Export/$page" ); 01702 if( is_null( $link ) || $link->getInterwiki() == '' ) { 01703 return Status::newFatal( 'importbadinterwiki' ); 01704 } else { 01705 $params = array(); 01706 if ( $history ) $params['history'] = 1; 01707 if ( $templates ) $params['templates'] = 1; 01708 if ( $pageLinkDepth ) $params['pagelink-depth'] = $pageLinkDepth; 01709 $url = $link->getFullUrl( $params ); 01710 # For interwikis, use POST to avoid redirects. 01711 return ImportStreamSource::newFromURL( $url, "POST" ); 01712 } 01713 } 01714 }