MediaWiki
master
|
00001 <?php 00033 class WikiExporter { 00034 var $list_authors = false ; # Return distinct author list (when not returning full history) 00035 var $author_list = "" ; 00036 00037 var $dumpUploads = false; 00038 var $dumpUploadFileContents = false; 00039 00040 const FULL = 1; 00041 const CURRENT = 2; 00042 const STABLE = 4; // extension defined 00043 const LOGS = 8; 00044 const RANGE = 16; 00045 00046 const BUFFER = 0; 00047 const STREAM = 1; 00048 00049 const TEXT = 0; 00050 const STUB = 1; 00051 00052 var $buffer; 00053 00054 var $text; 00055 00059 var $sink; 00060 00065 public static function schemaVersion() { 00066 return "0.8"; 00067 } 00068 00086 function __construct( $db, $history = WikiExporter::CURRENT, 00087 $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) { 00088 $this->db = $db; 00089 $this->history = $history; 00090 $this->buffer = $buffer; 00091 $this->writer = new XmlDumpWriter(); 00092 $this->sink = new DumpOutput(); 00093 $this->text = $text; 00094 } 00095 00103 public function setOutputSink( &$sink ) { 00104 $this->sink =& $sink; 00105 } 00106 00107 public function openStream() { 00108 $output = $this->writer->openStream(); 00109 $this->sink->writeOpenStream( $output ); 00110 } 00111 00112 public function closeStream() { 00113 $output = $this->writer->closeStream(); 00114 $this->sink->writeCloseStream( $output ); 00115 } 00116 00122 public function allPages() { 00123 $this->dumpFrom( '' ); 00124 } 00125 00133 public function pagesByRange( $start, $end ) { 00134 $condition = 'page_id >= ' . intval( $start ); 00135 if ( $end ) { 00136 $condition .= ' AND page_id < ' . intval( $end ); 00137 } 00138 $this->dumpFrom( $condition ); 00139 } 00140 00148 public function revsByRange( $start, $end ) { 00149 $condition = 'rev_id >= ' . intval( $start ); 00150 if ( $end ) { 00151 $condition .= ' AND rev_id < ' . intval( $end ); 00152 } 00153 $this->dumpFrom( $condition ); 00154 } 00155 00159 public function pageByTitle( $title ) { 00160 $this->dumpFrom( 00161 'page_namespace=' . $title->getNamespace() . 00162 ' AND page_title=' . $this->db->addQuotes( $title->getDBkey() ) ); 00163 } 00164 00169 public function pageByName( $name ) { 00170 $title = Title::newFromText( $name ); 00171 if ( is_null( $title ) ) { 00172 throw new MWException( "Can't export invalid title" ); 00173 } else { 00174 $this->pageByTitle( $title ); 00175 } 00176 } 00177 00181 public function pagesByName( $names ) { 00182 foreach ( $names as $name ) { 00183 $this->pageByName( $name ); 00184 } 00185 } 00186 00187 public function allLogs() { 00188 $this->dumpFrom( '' ); 00189 } 00190 00195 public function logsByRange( $start, $end ) { 00196 $condition = 'log_id >= ' . intval( $start ); 00197 if ( $end ) { 00198 $condition .= ' AND log_id < ' . intval( $end ); 00199 } 00200 $this->dumpFrom( $condition ); 00201 } 00202 00210 protected function do_list_authors( $cond ) { 00211 wfProfileIn( __METHOD__ ); 00212 $this->author_list = "<contributors>"; 00213 // rev_deleted 00214 00215 $res = $this->db->select( 00216 array( 'page', 'revision' ), 00217 array( 'DISTINCT rev_user_text', 'rev_user' ), 00218 array( 00219 $this->db->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0', 00220 $cond, 00221 'page_id = rev_id', 00222 ), 00223 __METHOD__ 00224 ); 00225 00226 foreach ( $res as $row ) { 00227 $this->author_list .= "<contributor>" . 00228 "<username>" . 00229 htmlentities( $row->rev_user_text ) . 00230 "</username>" . 00231 "<id>" . 00232 $row->rev_user . 00233 "</id>" . 00234 "</contributor>"; 00235 } 00236 $this->author_list .= "</contributors>"; 00237 wfProfileOut( __METHOD__ ); 00238 } 00239 00245 protected function dumpFrom( $cond = '' ) { 00246 wfProfileIn( __METHOD__ ); 00247 # For logging dumps... 00248 if ( $this->history & self::LOGS ) { 00249 $where = array( 'user_id = log_user' ); 00250 # Hide private logs 00251 $hideLogs = LogEventsList::getExcludeClause( $this->db ); 00252 if ( $hideLogs ) $where[] = $hideLogs; 00253 # Add on any caller specified conditions 00254 if ( $cond ) $where[] = $cond; 00255 # Get logging table name for logging.* clause 00256 $logging = $this->db->tableName( 'logging' ); 00257 00258 if ( $this->buffer == WikiExporter::STREAM ) { 00259 $prev = $this->db->bufferResults( false ); 00260 } 00261 $wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early 00262 try { 00263 $result = $this->db->select( array( 'logging', 'user' ), 00264 array( "{$logging}.*", 'user_name' ), // grab the user name 00265 $where, 00266 __METHOD__, 00267 array( 'ORDER BY' => 'log_id', 'USE INDEX' => array( 'logging' => 'PRIMARY' ) ) 00268 ); 00269 $wrapper = $this->db->resultObject( $result ); 00270 $this->outputLogStream( $wrapper ); 00271 if ( $this->buffer == WikiExporter::STREAM ) { 00272 $this->db->bufferResults( $prev ); 00273 } 00274 } catch ( Exception $e ) { 00275 // Throwing the exception does not reliably free the resultset, and 00276 // would also leave the connection in unbuffered mode. 00277 00278 // Freeing result 00279 try { 00280 if ( $wrapper ) { 00281 $wrapper->free(); 00282 } 00283 } catch ( Exception $e2 ) { 00284 // Already in panic mode -> ignoring $e2 as $e has 00285 // higher priority 00286 } 00287 00288 // Putting database back in previous buffer mode 00289 try { 00290 if ( $this->buffer == WikiExporter::STREAM ) { 00291 $this->db->bufferResults( $prev ); 00292 } 00293 } catch ( Exception $e2 ) { 00294 // Already in panic mode -> ignoring $e2 as $e has 00295 // higher priority 00296 } 00297 00298 // Inform caller about problem 00299 throw $e; 00300 } 00301 # For page dumps... 00302 } else { 00303 $tables = array( 'page', 'revision' ); 00304 $opts = array( 'ORDER BY' => 'page_id ASC' ); 00305 $opts['USE INDEX'] = array(); 00306 $join = array(); 00307 if ( is_array( $this->history ) ) { 00308 # Time offset/limit for all pages/history... 00309 $revJoin = 'page_id=rev_page'; 00310 # Set time order 00311 if ( $this->history['dir'] == 'asc' ) { 00312 $op = '>'; 00313 $opts['ORDER BY'] = 'rev_timestamp ASC'; 00314 } else { 00315 $op = '<'; 00316 $opts['ORDER BY'] = 'rev_timestamp DESC'; 00317 } 00318 # Set offset 00319 if ( !empty( $this->history['offset'] ) ) { 00320 $revJoin .= " AND rev_timestamp $op " . 00321 $this->db->addQuotes( $this->db->timestamp( $this->history['offset'] ) ); 00322 } 00323 $join['revision'] = array( 'INNER JOIN', $revJoin ); 00324 # Set query limit 00325 if ( !empty( $this->history['limit'] ) ) { 00326 $opts['LIMIT'] = intval( $this->history['limit'] ); 00327 } 00328 } elseif ( $this->history & WikiExporter::FULL ) { 00329 # Full history dumps... 00330 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' ); 00331 } elseif ( $this->history & WikiExporter::CURRENT ) { 00332 # Latest revision dumps... 00333 if ( $this->list_authors && $cond != '' ) { // List authors, if so desired 00334 $this->do_list_authors( $cond ); 00335 } 00336 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' ); 00337 } elseif ( $this->history & WikiExporter::STABLE ) { 00338 # "Stable" revision dumps... 00339 # Default JOIN, to be overridden... 00340 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' ); 00341 # One, and only one hook should set this, and return false 00342 if ( wfRunHooks( 'WikiExporter::dumpStableQuery', array( &$tables, &$opts, &$join ) ) ) { 00343 wfProfileOut( __METHOD__ ); 00344 throw new MWException( __METHOD__ . " given invalid history dump type." ); 00345 } 00346 } elseif ( $this->history & WikiExporter::RANGE ) { 00347 # Dump of revisions within a specified range 00348 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' ); 00349 $opts['ORDER BY'] = array( 'rev_page ASC', 'rev_id ASC' ); 00350 } else { 00351 # Uknown history specification parameter? 00352 wfProfileOut( __METHOD__ ); 00353 throw new MWException( __METHOD__ . " given invalid history dump type." ); 00354 } 00355 # Query optimization hacks 00356 if ( $cond == '' ) { 00357 $opts[] = 'STRAIGHT_JOIN'; 00358 $opts['USE INDEX']['page'] = 'PRIMARY'; 00359 } 00360 # Build text join options 00361 if ( $this->text != WikiExporter::STUB ) { // 1-pass 00362 $tables[] = 'text'; 00363 $join['text'] = array( 'INNER JOIN', 'rev_text_id=old_id' ); 00364 } 00365 00366 if ( $this->buffer == WikiExporter::STREAM ) { 00367 $prev = $this->db->bufferResults( false ); 00368 } 00369 00370 $wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early 00371 try { 00372 wfRunHooks( 'ModifyExportQuery', 00373 array( $this->db, &$tables, &$cond, &$opts, &$join ) ); 00374 00375 # Do the query! 00376 $result = $this->db->select( $tables, '*', $cond, __METHOD__, $opts, $join ); 00377 $wrapper = $this->db->resultObject( $result ); 00378 # Output dump results 00379 $this->outputPageStream( $wrapper ); 00380 00381 if ( $this->buffer == WikiExporter::STREAM ) { 00382 $this->db->bufferResults( $prev ); 00383 } 00384 } catch ( Exception $e ) { 00385 // Throwing the exception does not reliably free the resultset, and 00386 // would also leave the connection in unbuffered mode. 00387 00388 // Freeing result 00389 try { 00390 if ( $wrapper ) { 00391 $wrapper->free(); 00392 } 00393 } catch ( Exception $e2 ) { 00394 // Already in panic mode -> ignoring $e2 as $e has 00395 // higher priority 00396 } 00397 00398 // Putting database back in previous buffer mode 00399 try { 00400 if ( $this->buffer == WikiExporter::STREAM ) { 00401 $this->db->bufferResults( $prev ); 00402 } 00403 } catch ( Exception $e2 ) { 00404 // Already in panic mode -> ignoring $e2 as $e has 00405 // higher priority 00406 } 00407 00408 // Inform caller about problem 00409 throw $e; 00410 } 00411 } 00412 wfProfileOut( __METHOD__ ); 00413 } 00414 00427 protected function outputPageStream( $resultset ) { 00428 $last = null; 00429 foreach ( $resultset as $row ) { 00430 if ( is_null( $last ) || 00431 $last->page_namespace != $row->page_namespace || 00432 $last->page_title != $row->page_title ) { 00433 if ( isset( $last ) ) { 00434 $output = ''; 00435 if ( $this->dumpUploads ) { 00436 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents ); 00437 } 00438 $output .= $this->writer->closePage(); 00439 $this->sink->writeClosePage( $output ); 00440 } 00441 $output = $this->writer->openPage( $row ); 00442 $this->sink->writeOpenPage( $row, $output ); 00443 $last = $row; 00444 } 00445 $output = $this->writer->writeRevision( $row ); 00446 $this->sink->writeRevision( $row, $output ); 00447 } 00448 if ( isset( $last ) ) { 00449 $output = ''; 00450 if ( $this->dumpUploads ) { 00451 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents ); 00452 } 00453 $output .= $this->author_list; 00454 $output .= $this->writer->closePage(); 00455 $this->sink->writeClosePage( $output ); 00456 } 00457 } 00458 00462 protected function outputLogStream( $resultset ) { 00463 foreach ( $resultset as $row ) { 00464 $output = $this->writer->writeLogItem( $row ); 00465 $this->sink->writeLogItem( $row, $output ); 00466 } 00467 } 00468 } 00469 00473 class XmlDumpWriter { 00479 function schemaVersion() { 00480 wfDeprecated( __METHOD__, '1.20' ); 00481 return WikiExporter::schemaVersion(); 00482 } 00483 00494 function openStream() { 00495 global $wgLanguageCode; 00496 $ver = WikiExporter::schemaVersion(); 00497 return Xml::element( 'mediawiki', array( 00498 'xmlns' => "http://www.mediawiki.org/xml/export-$ver/", 00499 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", 00500 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " . 00501 "http://www.mediawiki.org/xml/export-$ver.xsd", #TODO: how do we get a new version up there? 00502 'version' => $ver, 00503 'xml:lang' => $wgLanguageCode ), 00504 null ) . 00505 "\n" . 00506 $this->siteInfo(); 00507 } 00508 00512 function siteInfo() { 00513 $info = array( 00514 $this->sitename(), 00515 $this->homelink(), 00516 $this->generator(), 00517 $this->caseSetting(), 00518 $this->namespaces() ); 00519 return " <siteinfo>\n " . 00520 implode( "\n ", $info ) . 00521 "\n </siteinfo>\n"; 00522 } 00523 00527 function sitename() { 00528 global $wgSitename; 00529 return Xml::element( 'sitename', array(), $wgSitename ); 00530 } 00531 00535 function generator() { 00536 global $wgVersion; 00537 return Xml::element( 'generator', array(), "MediaWiki $wgVersion" ); 00538 } 00539 00543 function homelink() { 00544 return Xml::element( 'base', array(), Title::newMainPage()->getCanonicalUrl() ); 00545 } 00546 00550 function caseSetting() { 00551 global $wgCapitalLinks; 00552 // "case-insensitive" option is reserved for future 00553 $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive'; 00554 return Xml::element( 'case', array(), $sensitivity ); 00555 } 00556 00560 function namespaces() { 00561 global $wgContLang; 00562 $spaces = "<namespaces>\n"; 00563 foreach ( $wgContLang->getFormattedNamespaces() as $ns => $title ) { 00564 $spaces .= ' ' . 00565 Xml::element( 'namespace', 00566 array( 'key' => $ns, 00567 'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive', 00568 ), $title ) . "\n"; 00569 } 00570 $spaces .= " </namespaces>"; 00571 return $spaces; 00572 } 00573 00580 function closeStream() { 00581 return "</mediawiki>\n"; 00582 } 00583 00592 function openPage( $row ) { 00593 $out = " <page>\n"; 00594 $title = Title::makeTitle( $row->page_namespace, $row->page_title ); 00595 $out .= ' ' . Xml::elementClean( 'title', array(), self::canonicalTitle( $title ) ) . "\n"; 00596 $out .= ' ' . Xml::element( 'ns', array(), strval( $row->page_namespace) ) . "\n"; 00597 $out .= ' ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n"; 00598 if ( $row->page_is_redirect ) { 00599 $page = WikiPage::factory( $title ); 00600 $redirect = $page->getRedirectTarget(); 00601 if ( $redirect instanceOf Title && $redirect->isValidRedirectTarget() ) { 00602 $out .= ' ' . Xml::element( 'redirect', array( 'title' => self::canonicalTitle( $redirect ) ) ) . "\n"; 00603 } 00604 } 00605 00606 if ( $row->page_restrictions != '' ) { 00607 $out .= ' ' . Xml::element( 'restrictions', array(), 00608 strval( $row->page_restrictions ) ) . "\n"; 00609 } 00610 00611 wfRunHooks( 'XmlDumpWriterOpenPage', array( $this, &$out, $row, $title ) ); 00612 00613 return $out; 00614 } 00615 00622 function closePage() { 00623 return " </page>\n"; 00624 } 00625 00634 function writeRevision( $row ) { 00635 wfProfileIn( __METHOD__ ); 00636 00637 $out = " <revision>\n"; 00638 $out .= " " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n"; 00639 if( isset( $row->rev_parent_id ) && $row->rev_parent_id ) { 00640 $out .= " " . Xml::element( 'parentid', null, strval( $row->rev_parent_id ) ) . "\n"; 00641 } 00642 00643 $out .= $this->writeTimestamp( $row->rev_timestamp ); 00644 00645 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_USER ) ) { 00646 $out .= " " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n"; 00647 } else { 00648 $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text ); 00649 } 00650 00651 if ( isset( $row->rev_minor_edit ) && $row->rev_minor_edit ) { 00652 $out .= " <minor/>\n"; 00653 } 00654 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_COMMENT ) ) { 00655 $out .= " " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n"; 00656 } elseif ( $row->rev_comment != '' ) { 00657 $out .= " " . Xml::elementClean( 'comment', array(), strval( $row->rev_comment ) ) . "\n"; 00658 } 00659 00660 $text = ''; 00661 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) { 00662 $out .= " " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n"; 00663 } elseif ( isset( $row->old_text ) ) { 00664 // Raw text from the database may have invalid chars 00665 $text = strval( Revision::getRevisionText( $row ) ); 00666 $out .= " " . Xml::elementClean( 'text', 00667 array( 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ), 00668 strval( $text ) ) . "\n"; 00669 } else { 00670 // Stub output 00671 $out .= " " . Xml::element( 'text', 00672 array( 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ), 00673 "" ) . "\n"; 00674 } 00675 00676 if ( isset( $row->rev_sha1 ) && $row->rev_sha1 && !( $row->rev_deleted & Revision::DELETED_TEXT ) ) { 00677 $out .= " " . Xml::element('sha1', null, strval( $row->rev_sha1 ) ) . "\n"; 00678 } else { 00679 $out .= " <sha1/>\n"; 00680 } 00681 00682 if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) { 00683 $content_model = strval( $row->rev_content_model ); 00684 } else { 00685 // probably using $wgContentHandlerUseDB = false; 00686 // @todo: test! 00687 $title = Title::makeTitle( $row->page_namespace, $row->page_title ); 00688 $content_model = ContentHandler::getDefaultModelFor( $title ); 00689 } 00690 00691 $out .= " " . Xml::element('model', null, strval( $content_model ) ) . "\n"; 00692 00693 if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) { 00694 $content_format = strval( $row->rev_content_format ); 00695 } else { 00696 // probably using $wgContentHandlerUseDB = false; 00697 // @todo: test! 00698 $content_handler = ContentHandler::getForModelID( $content_model ); 00699 $content_format = $content_handler->getDefaultFormat(); 00700 } 00701 00702 $out .= " " . Xml::element('format', null, strval( $content_format ) ) . "\n"; 00703 00704 wfRunHooks( 'XmlDumpWriterWriteRevision', array( &$this, &$out, $row, $text ) ); 00705 00706 $out .= " </revision>\n"; 00707 00708 wfProfileOut( __METHOD__ ); 00709 return $out; 00710 } 00711 00720 function writeLogItem( $row ) { 00721 wfProfileIn( __METHOD__ ); 00722 00723 $out = " <logitem>\n"; 00724 $out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n"; 00725 00726 $out .= $this->writeTimestamp( $row->log_timestamp, " " ); 00727 00728 if ( $row->log_deleted & LogPage::DELETED_USER ) { 00729 $out .= " " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n"; 00730 } else { 00731 $out .= $this->writeContributor( $row->log_user, $row->user_name, " " ); 00732 } 00733 00734 if ( $row->log_deleted & LogPage::DELETED_COMMENT ) { 00735 $out .= " " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n"; 00736 } elseif ( $row->log_comment != '' ) { 00737 $out .= " " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n"; 00738 } 00739 00740 $out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n"; 00741 $out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n"; 00742 00743 if ( $row->log_deleted & LogPage::DELETED_ACTION ) { 00744 $out .= " " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n"; 00745 } else { 00746 $title = Title::makeTitle( $row->log_namespace, $row->log_title ); 00747 $out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n"; 00748 $out .= " " . Xml::elementClean( 'params', 00749 array( 'xml:space' => 'preserve' ), 00750 strval( $row->log_params ) ) . "\n"; 00751 } 00752 00753 $out .= " </logitem>\n"; 00754 00755 wfProfileOut( __METHOD__ ); 00756 return $out; 00757 } 00758 00764 function writeTimestamp( $timestamp, $indent = " " ) { 00765 $ts = wfTimestamp( TS_ISO_8601, $timestamp ); 00766 return $indent . Xml::element( 'timestamp', null, $ts ) . "\n"; 00767 } 00768 00775 function writeContributor( $id, $text, $indent = " " ) { 00776 $out = $indent . "<contributor>\n"; 00777 if ( $id || !IP::isValid( $text ) ) { 00778 $out .= $indent . " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n"; 00779 $out .= $indent . " " . Xml::element( 'id', null, strval( $id ) ) . "\n"; 00780 } else { 00781 $out .= $indent . " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n"; 00782 } 00783 $out .= $indent . "</contributor>\n"; 00784 return $out; 00785 } 00786 00793 function writeUploads( $row, $dumpContents = false ) { 00794 if ( $row->page_namespace == NS_FILE ) { 00795 $img = wfLocalFile( $row->page_title ); 00796 if ( $img && $img->exists() ) { 00797 $out = ''; 00798 foreach ( array_reverse( $img->getHistory() ) as $ver ) { 00799 $out .= $this->writeUpload( $ver, $dumpContents ); 00800 } 00801 $out .= $this->writeUpload( $img, $dumpContents ); 00802 return $out; 00803 } 00804 } 00805 return ''; 00806 } 00807 00813 function writeUpload( $file, $dumpContents = false ) { 00814 if ( $file->isOld() ) { 00815 $archiveName = " " . 00816 Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n"; 00817 } else { 00818 $archiveName = ''; 00819 } 00820 if ( $dumpContents ) { 00821 # Dump file as base64 00822 # Uses only XML-safe characters, so does not need escaping 00823 $contents = ' <contents encoding="base64">' . 00824 chunk_split( base64_encode( file_get_contents( $file->getPath() ) ) ) . 00825 " </contents>\n"; 00826 } else { 00827 $contents = ''; 00828 } 00829 if ( $file->isDeleted( File::DELETED_COMMENT ) ) { 00830 $comment = Xml::element( 'comment', array( 'deleted' => 'deleted' ) ); 00831 } else { 00832 $comment = Xml::elementClean( 'comment', null, $file->getDescription() ); 00833 } 00834 return " <upload>\n" . 00835 $this->writeTimestamp( $file->getTimestamp() ) . 00836 $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) . 00837 " " . $comment . "\n" . 00838 " " . Xml::element( 'filename', null, $file->getName() ) . "\n" . 00839 $archiveName . 00840 " " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" . 00841 " " . Xml::element( 'size', null, $file->getSize() ) . "\n" . 00842 " " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" . 00843 " " . Xml::element( 'rel', null, $file->getRel() ) . "\n" . 00844 $contents . 00845 " </upload>\n"; 00846 } 00847 00858 public static function canonicalTitle( Title $title ) { 00859 if ( $title->getInterwiki() ) { 00860 return $title->getPrefixedText(); 00861 } 00862 00863 global $wgContLang; 00864 $prefix = str_replace( '_', ' ', $wgContLang->getNsText( $title->getNamespace() ) ); 00865 00866 if ( $prefix !== '' ) { 00867 $prefix .= ':'; 00868 } 00869 00870 return $prefix . $title->getText(); 00871 } 00872 } 00873 00874 00879 class DumpOutput { 00880 00884 function writeOpenStream( $string ) { 00885 $this->write( $string ); 00886 } 00887 00891 function writeCloseStream( $string ) { 00892 $this->write( $string ); 00893 } 00894 00899 function writeOpenPage( $page, $string ) { 00900 $this->write( $string ); 00901 } 00902 00906 function writeClosePage( $string ) { 00907 $this->write( $string ); 00908 } 00909 00914 function writeRevision( $rev, $string ) { 00915 $this->write( $string ); 00916 } 00917 00922 function writeLogItem( $rev, $string ) { 00923 $this->write( $string ); 00924 } 00925 00931 function write( $string ) { 00932 print $string; 00933 } 00934 00942 function closeRenameAndReopen( $newname ) { 00943 return; 00944 } 00945 00953 function closeAndRename( $newname, $open = false ) { 00954 return; 00955 } 00956 00962 function getFilenames() { 00963 return NULL; 00964 } 00965 } 00966 00971 class DumpFileOutput extends DumpOutput { 00972 protected $handle = false, $filename; 00973 00977 function __construct( $file ) { 00978 $this->handle = fopen( $file, "wt" ); 00979 $this->filename = $file; 00980 } 00981 00985 function writeCloseStream( $string ) { 00986 parent::writeCloseStream( $string ); 00987 if ( $this->handle ) { 00988 fclose( $this->handle ); 00989 $this->handle = false; 00990 } 00991 } 00992 00996 function write( $string ) { 00997 fputs( $this->handle, $string ); 00998 } 00999 01003 function closeRenameAndReopen( $newname ) { 01004 $this->closeAndRename( $newname, true ); 01005 } 01006 01011 function renameOrException( $newname ) { 01012 if (! rename( $this->filename, $newname ) ) { 01013 throw new MWException( __METHOD__ . ": rename of file {$this->filename} to $newname failed\n" ); 01014 } 01015 } 01016 01022 function checkRenameArgCount( $newname ) { 01023 if ( is_array( $newname ) ) { 01024 if ( count( $newname ) > 1 ) { 01025 throw new MWException( __METHOD__ . ": passed multiple arguments for rename of single file\n" ); 01026 } else { 01027 $newname = $newname[0]; 01028 } 01029 } 01030 return $newname; 01031 } 01032 01037 function closeAndRename( $newname, $open = false ) { 01038 $newname = $this->checkRenameArgCount( $newname ); 01039 if ( $newname ) { 01040 if ( $this->handle ) { 01041 fclose( $this->handle ); 01042 $this->handle = false; 01043 } 01044 $this->renameOrException( $newname ); 01045 if ( $open ) { 01046 $this->handle = fopen( $this->filename, "wt" ); 01047 } 01048 } 01049 } 01050 01054 function getFilenames() { 01055 return $this->filename; 01056 } 01057 } 01058 01065 class DumpPipeOutput extends DumpFileOutput { 01066 protected $command, $filename; 01067 protected $procOpenResource = false; 01068 01073 function __construct( $command, $file = null ) { 01074 if ( !is_null( $file ) ) { 01075 $command .= " > " . wfEscapeShellArg( $file ); 01076 } 01077 01078 $this->startCommand( $command ); 01079 $this->command = $command; 01080 $this->filename = $file; 01081 } 01082 01086 function writeCloseStream( $string ) { 01087 parent::writeCloseStream( $string ); 01088 if ( $this->procOpenResource ) { 01089 proc_close( $this->procOpenResource ); 01090 $this->procOpenResource = false; 01091 } 01092 } 01093 01097 function startCommand( $command ) { 01098 $spec = array( 01099 0 => array( "pipe", "r" ), 01100 ); 01101 $pipes = array(); 01102 $this->procOpenResource = proc_open( $command, $spec, $pipes ); 01103 $this->handle = $pipes[0]; 01104 } 01105 01109 function closeRenameAndReopen( $newname ) { 01110 $this->closeAndRename( $newname, true ); 01111 } 01112 01117 function closeAndRename( $newname, $open = false ) { 01118 $newname = $this->checkRenameArgCount( $newname ); 01119 if ( $newname ) { 01120 if ( $this->handle ) { 01121 fclose( $this->handle ); 01122 $this->handle = false; 01123 } 01124 if ( $this->procOpenResource ) { 01125 proc_close( $this->procOpenResource ); 01126 $this->procOpenResource = false; 01127 } 01128 $this->renameOrException( $newname ); 01129 if ( $open ) { 01130 $command = $this->command; 01131 $command .= " > " . wfEscapeShellArg( $this->filename ); 01132 $this->startCommand( $command ); 01133 } 01134 } 01135 } 01136 01137 } 01138 01143 class DumpGZipOutput extends DumpPipeOutput { 01144 01148 function __construct( $file ) { 01149 parent::__construct( "gzip", $file ); 01150 } 01151 } 01152 01157 class DumpBZip2Output extends DumpPipeOutput { 01158 01162 function __construct( $file ) { 01163 parent::__construct( "bzip2", $file ); 01164 } 01165 } 01166 01171 class Dump7ZipOutput extends DumpPipeOutput { 01172 01176 function __construct( $file ) { 01177 $command = $this->setup7zCommand( $file ); 01178 parent::__construct( $command ); 01179 $this->filename = $file; 01180 } 01181 01186 function setup7zCommand( $file ) { 01187 $command = "7za a -bd -si " . wfEscapeShellArg( $file ); 01188 // Suppress annoying useless crap from p7zip 01189 // Unfortunately this could suppress real error messages too 01190 $command .= ' >' . wfGetNull() . ' 2>&1'; 01191 return( $command ); 01192 } 01193 01198 function closeAndRename( $newname, $open = false ) { 01199 $newname = $this->checkRenameArgCount( $newname ); 01200 if ( $newname ) { 01201 fclose( $this->handle ); 01202 proc_close( $this->procOpenResource ); 01203 $this->renameOrException( $newname ); 01204 if ( $open ) { 01205 $command = $this->setup7zCommand( $this->filename ); 01206 $this->startCommand( $command ); 01207 } 01208 } 01209 } 01210 } 01211 01218 class DumpFilter { 01219 01225 public $sink; 01226 01230 protected $sendingThisPage; 01231 01235 function __construct( &$sink ) { 01236 $this->sink =& $sink; 01237 } 01238 01242 function writeOpenStream( $string ) { 01243 $this->sink->writeOpenStream( $string ); 01244 } 01245 01249 function writeCloseStream( $string ) { 01250 $this->sink->writeCloseStream( $string ); 01251 } 01252 01257 function writeOpenPage( $page, $string ) { 01258 $this->sendingThisPage = $this->pass( $page, $string ); 01259 if ( $this->sendingThisPage ) { 01260 $this->sink->writeOpenPage( $page, $string ); 01261 } 01262 } 01263 01267 function writeClosePage( $string ) { 01268 if ( $this->sendingThisPage ) { 01269 $this->sink->writeClosePage( $string ); 01270 $this->sendingThisPage = false; 01271 } 01272 } 01273 01278 function writeRevision( $rev, $string ) { 01279 if ( $this->sendingThisPage ) { 01280 $this->sink->writeRevision( $rev, $string ); 01281 } 01282 } 01283 01288 function writeLogItem( $rev, $string ) { 01289 $this->sink->writeRevision( $rev, $string ); 01290 } 01291 01295 function closeRenameAndReopen( $newname ) { 01296 $this->sink->closeRenameAndReopen( $newname ); 01297 } 01298 01303 function closeAndRename( $newname, $open = false ) { 01304 $this->sink->closeAndRename( $newname, $open ); 01305 } 01306 01310 function getFilenames() { 01311 return $this->sink->getFilenames(); 01312 } 01313 01319 function pass( $page ) { 01320 return true; 01321 } 01322 } 01323 01328 class DumpNotalkFilter extends DumpFilter { 01329 01334 function pass( $page ) { 01335 return !MWNamespace::isTalk( $page->page_namespace ); 01336 } 01337 } 01338 01343 class DumpNamespaceFilter extends DumpFilter { 01344 var $invert = false; 01345 var $namespaces = array(); 01346 01352 function __construct( &$sink, $param ) { 01353 parent::__construct( $sink ); 01354 01355 $constants = array( 01356 "NS_MAIN" => NS_MAIN, 01357 "NS_TALK" => NS_TALK, 01358 "NS_USER" => NS_USER, 01359 "NS_USER_TALK" => NS_USER_TALK, 01360 "NS_PROJECT" => NS_PROJECT, 01361 "NS_PROJECT_TALK" => NS_PROJECT_TALK, 01362 "NS_FILE" => NS_FILE, 01363 "NS_FILE_TALK" => NS_FILE_TALK, 01364 "NS_IMAGE" => NS_IMAGE, // NS_IMAGE is an alias for NS_FILE 01365 "NS_IMAGE_TALK" => NS_IMAGE_TALK, 01366 "NS_MEDIAWIKI" => NS_MEDIAWIKI, 01367 "NS_MEDIAWIKI_TALK" => NS_MEDIAWIKI_TALK, 01368 "NS_TEMPLATE" => NS_TEMPLATE, 01369 "NS_TEMPLATE_TALK" => NS_TEMPLATE_TALK, 01370 "NS_HELP" => NS_HELP, 01371 "NS_HELP_TALK" => NS_HELP_TALK, 01372 "NS_CATEGORY" => NS_CATEGORY, 01373 "NS_CATEGORY_TALK" => NS_CATEGORY_TALK ); 01374 01375 if ( $param { 0 } == '!' ) { 01376 $this->invert = true; 01377 $param = substr( $param, 1 ); 01378 } 01379 01380 foreach ( explode( ',', $param ) as $key ) { 01381 $key = trim( $key ); 01382 if ( isset( $constants[$key] ) ) { 01383 $ns = $constants[$key]; 01384 $this->namespaces[$ns] = true; 01385 } elseif ( is_numeric( $key ) ) { 01386 $ns = intval( $key ); 01387 $this->namespaces[$ns] = true; 01388 } else { 01389 throw new MWException( "Unrecognized namespace key '$key'\n" ); 01390 } 01391 } 01392 } 01393 01398 function pass( $page ) { 01399 $match = isset( $this->namespaces[$page->page_namespace] ); 01400 return $this->invert xor $match; 01401 } 01402 } 01403 01404 01409 class DumpLatestFilter extends DumpFilter { 01410 var $page, $pageString, $rev, $revString; 01411 01416 function writeOpenPage( $page, $string ) { 01417 $this->page = $page; 01418 $this->pageString = $string; 01419 } 01420 01424 function writeClosePage( $string ) { 01425 if ( $this->rev ) { 01426 $this->sink->writeOpenPage( $this->page, $this->pageString ); 01427 $this->sink->writeRevision( $this->rev, $this->revString ); 01428 $this->sink->writeClosePage( $string ); 01429 } 01430 $this->rev = null; 01431 $this->revString = null; 01432 $this->page = null; 01433 $this->pageString = null; 01434 } 01435 01440 function writeRevision( $rev, $string ) { 01441 if ( $rev->rev_id == $this->page->page_latest ) { 01442 $this->rev = $rev; 01443 $this->revString = $string; 01444 } 01445 } 01446 } 01447 01452 class DumpMultiWriter { 01453 01457 function __construct( $sinks ) { 01458 $this->sinks = $sinks; 01459 $this->count = count( $sinks ); 01460 } 01461 01465 function writeOpenStream( $string ) { 01466 for ( $i = 0; $i < $this->count; $i++ ) { 01467 $this->sinks[$i]->writeOpenStream( $string ); 01468 } 01469 } 01470 01474 function writeCloseStream( $string ) { 01475 for ( $i = 0; $i < $this->count; $i++ ) { 01476 $this->sinks[$i]->writeCloseStream( $string ); 01477 } 01478 } 01479 01484 function writeOpenPage( $page, $string ) { 01485 for ( $i = 0; $i < $this->count; $i++ ) { 01486 $this->sinks[$i]->writeOpenPage( $page, $string ); 01487 } 01488 } 01489 01493 function writeClosePage( $string ) { 01494 for ( $i = 0; $i < $this->count; $i++ ) { 01495 $this->sinks[$i]->writeClosePage( $string ); 01496 } 01497 } 01498 01503 function writeRevision( $rev, $string ) { 01504 for ( $i = 0; $i < $this->count; $i++ ) { 01505 $this->sinks[$i]->writeRevision( $rev, $string ); 01506 } 01507 } 01508 01512 function closeRenameAndReopen( $newnames ) { 01513 $this->closeAndRename( $newnames, true ); 01514 } 01515 01520 function closeAndRename( $newnames, $open = false ) { 01521 for ( $i = 0; $i < $this->count; $i++ ) { 01522 $this->sinks[$i]->closeAndRename( $newnames[$i], $open ); 01523 } 01524 } 01525 01529 function getFilenames() { 01530 $filenames = array(); 01531 for ( $i = 0; $i < $this->count; $i++ ) { 01532 $filenames[] = $this->sinks[$i]->getFilenames(); 01533 } 01534 return $filenames; 01535 } 01536 01537 } 01538 01543 function xmlsafe( $string ) { 01544 wfProfileIn( __FUNCTION__ ); 01545 01551 $string = UtfNormal::cleanUp( $string ); 01552 01553 $string = htmlspecialchars( $string ); 01554 wfProfileOut( __FUNCTION__ ); 01555 return $string; 01556 }