MediaWiki  master
Export.php
Go to the documentation of this file.
00001 <?php
00033 class WikiExporter {
00034         var $list_authors = false ; # Return distinct author list (when not returning full history)
00035         var $author_list = "" ;
00036 
00037         var $dumpUploads = false;
00038         var $dumpUploadFileContents = false;
00039 
00040         const FULL = 1;
00041         const CURRENT = 2;
00042         const STABLE = 4; // extension defined
00043         const LOGS = 8;
00044         const RANGE = 16;
00045 
00046         const BUFFER = 0;
00047         const STREAM = 1;
00048 
00049         const TEXT = 0;
00050         const STUB = 1;
00051 
00052         var $buffer;
00053 
00054         var $text;
00055 
00059         var $sink;
00060 
00065         public static function schemaVersion() {
00066                 return "0.8";
00067         }
00068 
00086         function __construct( $db, $history = WikiExporter::CURRENT,
00087                         $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) {
00088                 $this->db = $db;
00089                 $this->history = $history;
00090                 $this->buffer  = $buffer;
00091                 $this->writer  = new XmlDumpWriter();
00092                 $this->sink    = new DumpOutput();
00093                 $this->text    = $text;
00094         }
00095 
00103         public function setOutputSink( &$sink ) {
00104                 $this->sink =& $sink;
00105         }
00106 
00107         public function openStream() {
00108                 $output = $this->writer->openStream();
00109                 $this->sink->writeOpenStream( $output );
00110         }
00111 
00112         public function closeStream() {
00113                 $output = $this->writer->closeStream();
00114                 $this->sink->writeCloseStream( $output );
00115         }
00116 
00122         public function allPages() {
00123                 $this->dumpFrom( '' );
00124         }
00125 
00133         public function pagesByRange( $start, $end ) {
00134                 $condition = 'page_id >= ' . intval( $start );
00135                 if ( $end ) {
00136                         $condition .= ' AND page_id < ' . intval( $end );
00137                 }
00138                 $this->dumpFrom( $condition );
00139         }
00140 
00148         public function revsByRange( $start, $end ) {
00149                 $condition = 'rev_id >= ' . intval( $start );
00150                 if ( $end ) {
00151                         $condition .= ' AND rev_id < ' . intval( $end );
00152                 }
00153                 $this->dumpFrom( $condition );
00154         }
00155 
00159         public function pageByTitle( $title ) {
00160                 $this->dumpFrom(
00161                         'page_namespace=' . $title->getNamespace() .
00162                         ' AND page_title=' . $this->db->addQuotes( $title->getDBkey() ) );
00163         }
00164 
00169         public function pageByName( $name ) {
00170                 $title = Title::newFromText( $name );
00171                 if ( is_null( $title ) ) {
00172                         throw new MWException( "Can't export invalid title" );
00173                 } else {
00174                         $this->pageByTitle( $title );
00175                 }
00176         }
00177 
00181         public function pagesByName( $names ) {
00182                 foreach ( $names as $name ) {
00183                         $this->pageByName( $name );
00184                 }
00185         }
00186 
00187         public function allLogs() {
00188                 $this->dumpFrom( '' );
00189         }
00190 
00195         public function logsByRange( $start, $end ) {
00196                 $condition = 'log_id >= ' . intval( $start );
00197                 if ( $end ) {
00198                         $condition .= ' AND log_id < ' . intval( $end );
00199                 }
00200                 $this->dumpFrom( $condition );
00201         }
00202 
00210         protected function do_list_authors( $cond ) {
00211                 wfProfileIn( __METHOD__ );
00212                 $this->author_list = "<contributors>";
00213                 // rev_deleted
00214 
00215                 $res = $this->db->select(
00216                         array( 'page', 'revision' ),
00217                         array( 'DISTINCT rev_user_text', 'rev_user' ),
00218                         array(
00219                                 $this->db->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0',
00220                                 $cond,
00221                                 'page_id = rev_id',
00222                         ),
00223                         __METHOD__
00224                 );
00225 
00226                 foreach ( $res as $row ) {
00227                         $this->author_list .= "<contributor>" .
00228                                 "<username>" .
00229                                 htmlentities( $row->rev_user_text )  .
00230                                 "</username>" .
00231                                 "<id>" .
00232                                 $row->rev_user .
00233                                 "</id>" .
00234                                 "</contributor>";
00235                 }
00236                 $this->author_list .= "</contributors>";
00237                 wfProfileOut( __METHOD__ );
00238         }
00239 
00245         protected function dumpFrom( $cond = '' ) {
00246                 wfProfileIn( __METHOD__ );
00247                 # For logging dumps...
00248                 if ( $this->history & self::LOGS ) {
00249                         $where = array( 'user_id = log_user' );
00250                         # Hide private logs
00251                         $hideLogs = LogEventsList::getExcludeClause( $this->db );
00252                         if ( $hideLogs ) $where[] = $hideLogs;
00253                         # Add on any caller specified conditions
00254                         if ( $cond ) $where[] = $cond;
00255                         # Get logging table name for logging.* clause
00256                         $logging = $this->db->tableName( 'logging' );
00257 
00258                         if ( $this->buffer == WikiExporter::STREAM ) {
00259                                 $prev = $this->db->bufferResults( false );
00260                         }
00261                         $wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early
00262                         try {
00263                                 $result = $this->db->select( array( 'logging', 'user' ),
00264                                         array( "{$logging}.*", 'user_name' ), // grab the user name
00265                                         $where,
00266                                         __METHOD__,
00267                                         array( 'ORDER BY' => 'log_id', 'USE INDEX' => array( 'logging' => 'PRIMARY' ) )
00268                                 );
00269                                 $wrapper = $this->db->resultObject( $result );
00270                                 $this->outputLogStream( $wrapper );
00271                                 if ( $this->buffer == WikiExporter::STREAM ) {
00272                                         $this->db->bufferResults( $prev );
00273                                 }
00274                         } catch ( Exception $e ) {
00275                                 // Throwing the exception does not reliably free the resultset, and
00276                                 // would also leave the connection in unbuffered mode.
00277 
00278                                 // Freeing result
00279                                 try {
00280                                         if ( $wrapper ) {
00281                                                 $wrapper->free();
00282                                         }
00283                                 } catch ( Exception $e2 ) {
00284                                         // Already in panic mode -> ignoring $e2 as $e has
00285                                         // higher priority
00286                                 }
00287 
00288                                 // Putting database back in previous buffer mode
00289                                 try {
00290                                         if ( $this->buffer == WikiExporter::STREAM ) {
00291                                                 $this->db->bufferResults( $prev );
00292                                         }
00293                                 } catch ( Exception $e2 ) {
00294                                         // Already in panic mode -> ignoring $e2 as $e has
00295                                         // higher priority
00296                                 }
00297 
00298                                 // Inform caller about problem
00299                                 throw $e;
00300                         }
00301                 # For page dumps...
00302                 } else {
00303                         $tables = array( 'page', 'revision' );
00304                         $opts = array( 'ORDER BY' => 'page_id ASC' );
00305                         $opts['USE INDEX'] = array();
00306                         $join = array();
00307                         if ( is_array( $this->history ) ) {
00308                                 # Time offset/limit for all pages/history...
00309                                 $revJoin = 'page_id=rev_page';
00310                                 # Set time order
00311                                 if ( $this->history['dir'] == 'asc' ) {
00312                                         $op = '>';
00313                                         $opts['ORDER BY'] = 'rev_timestamp ASC';
00314                                 } else {
00315                                         $op = '<';
00316                                         $opts['ORDER BY'] = 'rev_timestamp DESC';
00317                                 }
00318                                 # Set offset
00319                                 if ( !empty( $this->history['offset'] ) ) {
00320                                         $revJoin .= " AND rev_timestamp $op " .
00321                                                 $this->db->addQuotes( $this->db->timestamp( $this->history['offset'] ) );
00322                                 }
00323                                 $join['revision'] = array( 'INNER JOIN', $revJoin );
00324                                 # Set query limit
00325                                 if ( !empty( $this->history['limit'] ) ) {
00326                                         $opts['LIMIT'] = intval( $this->history['limit'] );
00327                                 }
00328                         } elseif ( $this->history & WikiExporter::FULL ) {
00329                                 # Full history dumps...
00330                                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' );
00331                         } elseif ( $this->history & WikiExporter::CURRENT ) {
00332                                 # Latest revision dumps...
00333                                 if ( $this->list_authors && $cond != '' )  { // List authors, if so desired
00334                                         $this->do_list_authors( $cond );
00335                                 }
00336                                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' );
00337                         } elseif ( $this->history & WikiExporter::STABLE ) {
00338                                 # "Stable" revision dumps...
00339                                 # Default JOIN, to be overridden...
00340                                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' );
00341                                 # One, and only one hook should set this, and return false
00342                                 if ( wfRunHooks( 'WikiExporter::dumpStableQuery', array( &$tables, &$opts, &$join ) ) ) {
00343                                         wfProfileOut( __METHOD__ );
00344                                         throw new MWException( __METHOD__ . " given invalid history dump type." );
00345                                 }
00346                         } elseif ( $this->history & WikiExporter::RANGE ) {
00347                                 # Dump of revisions within a specified range
00348                                 $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' );
00349                                 $opts['ORDER BY'] = array( 'rev_page ASC', 'rev_id ASC' );
00350                         } else {
00351                                 # Uknown history specification parameter?
00352                                 wfProfileOut( __METHOD__ );
00353                                 throw new MWException( __METHOD__ . " given invalid history dump type." );
00354                         }
00355                         # Query optimization hacks
00356                         if ( $cond == '' ) {
00357                                 $opts[] = 'STRAIGHT_JOIN';
00358                                 $opts['USE INDEX']['page'] = 'PRIMARY';
00359                         }
00360                         # Build text join options
00361                         if ( $this->text != WikiExporter::STUB ) { // 1-pass
00362                                 $tables[] = 'text';
00363                                 $join['text'] = array( 'INNER JOIN', 'rev_text_id=old_id' );
00364                         }
00365 
00366                         if ( $this->buffer == WikiExporter::STREAM ) {
00367                                 $prev = $this->db->bufferResults( false );
00368                         }
00369 
00370                         $wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early
00371                         try {
00372                                 wfRunHooks( 'ModifyExportQuery',
00373                                                 array( $this->db, &$tables, &$cond, &$opts, &$join ) );
00374 
00375                                 # Do the query!
00376                                 $result = $this->db->select( $tables, '*', $cond, __METHOD__, $opts, $join );
00377                                 $wrapper = $this->db->resultObject( $result );
00378                                 # Output dump results
00379                                 $this->outputPageStream( $wrapper );
00380 
00381                                 if ( $this->buffer == WikiExporter::STREAM ) {
00382                                         $this->db->bufferResults( $prev );
00383                                 }
00384                         } catch ( Exception $e ) {
00385                                 // Throwing the exception does not reliably free the resultset, and
00386                                 // would also leave the connection in unbuffered mode.
00387 
00388                                 // Freeing result
00389                                 try {
00390                                         if ( $wrapper ) {
00391                                                 $wrapper->free();
00392                                         }
00393                                 } catch ( Exception $e2 ) {
00394                                         // Already in panic mode -> ignoring $e2 as $e has
00395                                         // higher priority
00396                                 }
00397 
00398                                 // Putting database back in previous buffer mode
00399                                 try {
00400                                         if ( $this->buffer == WikiExporter::STREAM ) {
00401                                                 $this->db->bufferResults( $prev );
00402                                         }
00403                                 } catch ( Exception $e2 ) {
00404                                         // Already in panic mode -> ignoring $e2 as $e has
00405                                         // higher priority
00406                                 }
00407 
00408                                 // Inform caller about problem
00409                                 throw $e;
00410                         }
00411                 }
00412                 wfProfileOut( __METHOD__ );
00413         }
00414 
00427         protected function outputPageStream( $resultset ) {
00428                 $last = null;
00429                 foreach ( $resultset as $row ) {
00430                         if ( is_null( $last ) ||
00431                                 $last->page_namespace != $row->page_namespace ||
00432                                 $last->page_title     != $row->page_title ) {
00433                                 if ( isset( $last ) ) {
00434                                         $output = '';
00435                                         if ( $this->dumpUploads ) {
00436                                                 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents );
00437                                         }
00438                                         $output .= $this->writer->closePage();
00439                                         $this->sink->writeClosePage( $output );
00440                                 }
00441                                 $output = $this->writer->openPage( $row );
00442                                 $this->sink->writeOpenPage( $row, $output );
00443                                 $last = $row;
00444                         }
00445                         $output = $this->writer->writeRevision( $row );
00446                         $this->sink->writeRevision( $row, $output );
00447                 }
00448                 if ( isset( $last ) ) {
00449                         $output = '';
00450                         if ( $this->dumpUploads ) {
00451                                 $output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents );
00452                         }
00453                         $output .= $this->author_list;
00454                         $output .= $this->writer->closePage();
00455                         $this->sink->writeClosePage( $output );
00456                 }
00457         }
00458 
00462         protected function outputLogStream( $resultset ) {
00463                 foreach ( $resultset as $row ) {
00464                         $output = $this->writer->writeLogItem( $row );
00465                         $this->sink->writeLogItem( $row, $output );
00466                 }
00467         }
00468 }
00469 
00473 class XmlDumpWriter {
00479         function schemaVersion() {
00480                 wfDeprecated( __METHOD__, '1.20' );
00481                 return WikiExporter::schemaVersion();
00482         }
00483 
00494         function openStream() {
00495                 global $wgLanguageCode;
00496                 $ver = WikiExporter::schemaVersion();
00497                 return Xml::element( 'mediawiki', array(
00498                         'xmlns'              => "http://www.mediawiki.org/xml/export-$ver/",
00499                         'xmlns:xsi'          => "http://www.w3.org/2001/XMLSchema-instance",
00500                         'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
00501                                                 "http://www.mediawiki.org/xml/export-$ver.xsd", #TODO: how do we get a new version up there?
00502                         'version'            => $ver,
00503                         'xml:lang'           => $wgLanguageCode ),
00504                         null ) .
00505                         "\n" .
00506                         $this->siteInfo();
00507         }
00508 
00512         function siteInfo() {
00513                 $info = array(
00514                         $this->sitename(),
00515                         $this->homelink(),
00516                         $this->generator(),
00517                         $this->caseSetting(),
00518                         $this->namespaces() );
00519                 return "  <siteinfo>\n    " .
00520                         implode( "\n    ", $info ) .
00521                         "\n  </siteinfo>\n";
00522         }
00523 
00527         function sitename() {
00528                 global $wgSitename;
00529                 return Xml::element( 'sitename', array(), $wgSitename );
00530         }
00531 
00535         function generator() {
00536                 global $wgVersion;
00537                 return Xml::element( 'generator', array(), "MediaWiki $wgVersion" );
00538         }
00539 
00543         function homelink() {
00544                 return Xml::element( 'base', array(), Title::newMainPage()->getCanonicalUrl() );
00545         }
00546 
00550         function caseSetting() {
00551                 global $wgCapitalLinks;
00552                 // "case-insensitive" option is reserved for future
00553                 $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
00554                 return Xml::element( 'case', array(), $sensitivity );
00555         }
00556 
00560         function namespaces() {
00561                 global $wgContLang;
00562                 $spaces = "<namespaces>\n";
00563                 foreach ( $wgContLang->getFormattedNamespaces() as $ns => $title ) {
00564                         $spaces .= '      ' .
00565                                 Xml::element( 'namespace',
00566                                         array(  'key' => $ns,
00567                                                         'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive',
00568                                         ), $title ) . "\n";
00569                 }
00570                 $spaces .= "    </namespaces>";
00571                 return $spaces;
00572         }
00573 
00580         function closeStream() {
00581                 return "</mediawiki>\n";
00582         }
00583 
00592         function openPage( $row ) {
00593                 $out = "  <page>\n";
00594                 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
00595                 $out .= '    ' . Xml::elementClean( 'title', array(), self::canonicalTitle( $title ) ) . "\n";
00596                 $out .= '    ' . Xml::element( 'ns', array(), strval( $row->page_namespace) ) . "\n";
00597                 $out .= '    ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n";
00598                 if ( $row->page_is_redirect ) {
00599                         $page = WikiPage::factory( $title );
00600                         $redirect = $page->getRedirectTarget();
00601                         if ( $redirect instanceOf Title && $redirect->isValidRedirectTarget() ) {
00602                                 $out .= '    ' . Xml::element( 'redirect', array( 'title' => self::canonicalTitle( $redirect ) ) ) . "\n";
00603                         }
00604                 }
00605 
00606                 if ( $row->page_restrictions != '' ) {
00607                         $out .= '    ' . Xml::element( 'restrictions', array(),
00608                                 strval( $row->page_restrictions ) ) . "\n";
00609                 }
00610 
00611                 wfRunHooks( 'XmlDumpWriterOpenPage', array( $this, &$out, $row, $title ) );
00612 
00613                 return $out;
00614         }
00615 
00622         function closePage() {
00623                 return "  </page>\n";
00624         }
00625 
00634         function writeRevision( $row ) {
00635                 wfProfileIn( __METHOD__ );
00636 
00637                 $out  = "    <revision>\n";
00638                 $out .= "      " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n";
00639                 if( isset( $row->rev_parent_id ) && $row->rev_parent_id ) {
00640                         $out .= "      " . Xml::element( 'parentid', null, strval( $row->rev_parent_id ) ) . "\n";
00641                 }
00642 
00643                 $out .= $this->writeTimestamp( $row->rev_timestamp );
00644 
00645                 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_USER ) ) {
00646                         $out .= "      " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
00647                 } else {
00648                         $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text );
00649                 }
00650 
00651                 if ( isset( $row->rev_minor_edit ) && $row->rev_minor_edit ) {
00652                         $out .=  "      <minor/>\n";
00653                 }
00654                 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_COMMENT ) ) {
00655                         $out .= "      " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
00656                 } elseif ( $row->rev_comment != '' ) {
00657                         $out .= "      " . Xml::elementClean( 'comment', array(), strval( $row->rev_comment ) ) . "\n";
00658                 }
00659 
00660                 $text = '';
00661                 if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
00662                         $out .= "      " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
00663                 } elseif ( isset( $row->old_text ) ) {
00664                         // Raw text from the database may have invalid chars
00665                         $text = strval( Revision::getRevisionText( $row ) );
00666                         $out .= "      " . Xml::elementClean( 'text',
00667                                 array( 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ),
00668                                 strval( $text ) ) . "\n";
00669                 } else {
00670                         // Stub output
00671                         $out .= "      " . Xml::element( 'text',
00672                                 array( 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ),
00673                                 "" ) . "\n";
00674                 }
00675 
00676                 if ( isset( $row->rev_sha1 ) && $row->rev_sha1 && !( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
00677                         $out .= "      " . Xml::element('sha1', null, strval( $row->rev_sha1 ) ) . "\n";
00678                 } else {
00679                         $out .= "      <sha1/>\n";
00680                 }
00681 
00682                 if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model )  ) {
00683                         $content_model = strval( $row->rev_content_model );
00684                 } else {
00685                         // probably using $wgContentHandlerUseDB = false;
00686                         // @todo: test!
00687                         $title = Title::makeTitle( $row->page_namespace, $row->page_title );
00688                         $content_model = ContentHandler::getDefaultModelFor( $title );
00689                 }
00690 
00691                 $out .= "      " . Xml::element('model', null, strval( $content_model ) ) . "\n";
00692 
00693                 if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) {
00694                         $content_format = strval( $row->rev_content_format );
00695                 } else {
00696                         // probably using $wgContentHandlerUseDB = false;
00697                         // @todo: test!
00698                         $content_handler = ContentHandler::getForModelID( $content_model );
00699                         $content_format = $content_handler->getDefaultFormat();
00700                 }
00701 
00702                 $out .= "      " . Xml::element('format', null, strval( $content_format ) ) . "\n";
00703 
00704                 wfRunHooks( 'XmlDumpWriterWriteRevision', array( &$this, &$out, $row, $text ) );
00705 
00706                 $out .= "    </revision>\n";
00707 
00708                 wfProfileOut( __METHOD__ );
00709                 return $out;
00710         }
00711 
00720         function writeLogItem( $row ) {
00721                 wfProfileIn( __METHOD__ );
00722 
00723                 $out  = "  <logitem>\n";
00724                 $out .= "    " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n";
00725 
00726                 $out .= $this->writeTimestamp( $row->log_timestamp, "    " );
00727 
00728                 if ( $row->log_deleted & LogPage::DELETED_USER ) {
00729                         $out .= "    " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
00730                 } else {
00731                         $out .= $this->writeContributor( $row->log_user, $row->user_name, "    " );
00732                 }
00733 
00734                 if ( $row->log_deleted & LogPage::DELETED_COMMENT ) {
00735                         $out .= "    " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
00736                 } elseif ( $row->log_comment != '' ) {
00737                         $out .= "    " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n";
00738                 }
00739 
00740                 $out .= "    " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n";
00741                 $out .= "    " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n";
00742 
00743                 if ( $row->log_deleted & LogPage::DELETED_ACTION ) {
00744                         $out .= "    " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
00745                 } else {
00746                         $title = Title::makeTitle( $row->log_namespace, $row->log_title );
00747                         $out .= "    " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n";
00748                         $out .= "    " . Xml::elementClean( 'params',
00749                                 array( 'xml:space' => 'preserve' ),
00750                                 strval( $row->log_params ) ) . "\n";
00751                 }
00752 
00753                 $out .= "  </logitem>\n";
00754 
00755                 wfProfileOut( __METHOD__ );
00756                 return $out;
00757         }
00758 
00764         function writeTimestamp( $timestamp, $indent = "      " ) {
00765                 $ts = wfTimestamp( TS_ISO_8601, $timestamp );
00766                 return $indent . Xml::element( 'timestamp', null, $ts ) . "\n";
00767         }
00768 
00775         function writeContributor( $id, $text, $indent = "      " ) {
00776                 $out = $indent . "<contributor>\n";
00777                 if ( $id || !IP::isValid( $text ) ) {
00778                         $out .= $indent . "  " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n";
00779                         $out .= $indent . "  " . Xml::element( 'id', null, strval( $id ) ) . "\n";
00780                 } else {
00781                         $out .= $indent . "  " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n";
00782                 }
00783                 $out .= $indent . "</contributor>\n";
00784                 return $out;
00785         }
00786 
00793         function writeUploads( $row, $dumpContents = false ) {
00794                 if ( $row->page_namespace == NS_FILE ) {
00795                         $img = wfLocalFile( $row->page_title );
00796                         if ( $img && $img->exists() ) {
00797                                 $out = '';
00798                                 foreach ( array_reverse( $img->getHistory() ) as $ver ) {
00799                                         $out .= $this->writeUpload( $ver, $dumpContents );
00800                                 }
00801                                 $out .= $this->writeUpload( $img, $dumpContents );
00802                                 return $out;
00803                         }
00804                 }
00805                 return '';
00806         }
00807 
00813         function writeUpload( $file, $dumpContents = false ) {
00814                 if ( $file->isOld() ) {
00815                         $archiveName = "      " .
00816                                 Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n";
00817                 } else {
00818                         $archiveName = '';
00819                 }
00820                 if ( $dumpContents ) {
00821                         # Dump file as base64
00822                         # Uses only XML-safe characters, so does not need escaping
00823                         $contents = '      <contents encoding="base64">' .
00824                                 chunk_split( base64_encode( file_get_contents( $file->getPath() ) ) ) .
00825                                 "      </contents>\n";
00826                 } else {
00827                         $contents = '';
00828                 }
00829                 if ( $file->isDeleted( File::DELETED_COMMENT ) ) {
00830                         $comment = Xml::element( 'comment', array( 'deleted' => 'deleted' ) );
00831                 } else {
00832                         $comment = Xml::elementClean( 'comment', null, $file->getDescription() );
00833                 }
00834                 return "    <upload>\n" .
00835                         $this->writeTimestamp( $file->getTimestamp() ) .
00836                         $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) .
00837                         "      " . $comment . "\n" .
00838                         "      " . Xml::element( 'filename', null, $file->getName() ) . "\n" .
00839                         $archiveName .
00840                         "      " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" .
00841                         "      " . Xml::element( 'size', null, $file->getSize() ) . "\n" .
00842                         "      " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" .
00843                         "      " . Xml::element( 'rel', null, $file->getRel() ) . "\n" .
00844                         $contents .
00845                         "    </upload>\n";
00846         }
00847 
00858         public static function canonicalTitle( Title $title ) {
00859                 if ( $title->getInterwiki() ) {
00860                         return $title->getPrefixedText();
00861                 }
00862 
00863                 global $wgContLang;
00864                 $prefix = str_replace( '_', ' ', $wgContLang->getNsText( $title->getNamespace() ) );
00865 
00866                 if ( $prefix !== '' ) {
00867                         $prefix .= ':';
00868                 }
00869 
00870                 return $prefix . $title->getText();
00871         }
00872 }
00873 
00874 
00879 class DumpOutput {
00880 
00884         function writeOpenStream( $string ) {
00885                 $this->write( $string );
00886         }
00887 
00891         function writeCloseStream( $string ) {
00892                 $this->write( $string );
00893         }
00894 
00899         function writeOpenPage( $page, $string ) {
00900                 $this->write( $string );
00901         }
00902 
00906         function writeClosePage( $string ) {
00907                 $this->write( $string );
00908         }
00909 
00914         function writeRevision( $rev, $string ) {
00915                 $this->write( $string );
00916         }
00917 
00922         function writeLogItem( $rev, $string ) {
00923                 $this->write( $string );
00924         }
00925 
00931         function write( $string ) {
00932                 print $string;
00933         }
00934 
00942         function closeRenameAndReopen( $newname ) {
00943                 return;
00944         }
00945 
00953         function closeAndRename( $newname, $open = false ) {
00954                 return;
00955         }
00956 
00962         function getFilenames() {
00963                 return NULL;
00964         }
00965 }
00966 
00971 class DumpFileOutput extends DumpOutput {
00972         protected $handle = false, $filename;
00973 
00977         function __construct( $file ) {
00978                 $this->handle = fopen( $file, "wt" );
00979                 $this->filename = $file;
00980         }
00981 
00985         function writeCloseStream( $string ) {
00986                 parent::writeCloseStream( $string );
00987                 if ( $this->handle ) {
00988                         fclose( $this->handle );
00989                         $this->handle = false;
00990                 }
00991         }
00992 
00996         function write( $string ) {
00997                 fputs( $this->handle, $string );
00998         }
00999 
01003         function closeRenameAndReopen( $newname ) {
01004                 $this->closeAndRename( $newname, true );
01005         }
01006 
01011         function renameOrException( $newname ) {
01012                         if (! rename( $this->filename, $newname ) ) {
01013                                 throw new MWException( __METHOD__ . ": rename of file {$this->filename} to $newname failed\n" );
01014                         }
01015         }
01016 
01022         function checkRenameArgCount( $newname ) {
01023                 if ( is_array( $newname ) ) {
01024                         if ( count( $newname ) > 1 ) {
01025                                 throw new MWException( __METHOD__ . ": passed multiple arguments for rename of single file\n" );
01026                         } else {
01027                                 $newname = $newname[0];
01028                         }
01029                 }
01030                 return $newname;
01031         }
01032 
01037         function closeAndRename( $newname, $open = false ) {
01038                 $newname = $this->checkRenameArgCount( $newname );
01039                 if ( $newname ) {
01040                         if ( $this->handle ) {
01041                                 fclose( $this->handle );
01042                                 $this->handle = false;
01043                         }
01044                         $this->renameOrException( $newname );
01045                         if ( $open ) {
01046                                 $this->handle = fopen( $this->filename, "wt" );
01047                         }
01048                 }
01049         }
01050 
01054         function getFilenames() {
01055                 return $this->filename;
01056         }
01057 }
01058 
01065 class DumpPipeOutput extends DumpFileOutput {
01066         protected $command, $filename;
01067         protected $procOpenResource = false;
01068 
01073         function __construct( $command, $file = null ) {
01074                 if ( !is_null( $file ) ) {
01075                         $command .=  " > " . wfEscapeShellArg( $file );
01076                 }
01077 
01078                 $this->startCommand( $command );
01079                 $this->command = $command;
01080                 $this->filename = $file;
01081         }
01082 
01086         function writeCloseStream( $string ) {
01087                 parent::writeCloseStream( $string );
01088                 if ( $this->procOpenResource ) {
01089                         proc_close( $this->procOpenResource );
01090                         $this->procOpenResource = false;
01091                 }
01092         }
01093 
01097         function startCommand( $command ) {
01098                 $spec = array(
01099                         0 => array( "pipe", "r" ),
01100                 );
01101                 $pipes = array();
01102                 $this->procOpenResource = proc_open( $command, $spec, $pipes );
01103                 $this->handle = $pipes[0];
01104         }
01105 
01109         function closeRenameAndReopen( $newname ) {
01110                 $this->closeAndRename( $newname, true );
01111         }
01112 
01117         function closeAndRename( $newname, $open = false ) {
01118                 $newname = $this->checkRenameArgCount( $newname );
01119                 if ( $newname ) {
01120                         if ( $this->handle ) {
01121                                 fclose( $this->handle );
01122                                 $this->handle = false;
01123                         }
01124                         if ( $this->procOpenResource ) {
01125                                 proc_close( $this->procOpenResource );
01126                                 $this->procOpenResource = false;
01127                         }
01128                         $this->renameOrException( $newname );
01129                         if ( $open ) {
01130                                 $command = $this->command;
01131                                 $command .=  " > " . wfEscapeShellArg( $this->filename );
01132                                 $this->startCommand( $command );
01133                         }
01134                 }
01135         }
01136 
01137 }
01138 
01143 class DumpGZipOutput extends DumpPipeOutput {
01144 
01148         function __construct( $file ) {
01149                 parent::__construct( "gzip", $file );
01150         }
01151 }
01152 
01157 class DumpBZip2Output extends DumpPipeOutput {
01158 
01162         function __construct( $file ) {
01163                 parent::__construct( "bzip2", $file );
01164         }
01165 }
01166 
01171 class Dump7ZipOutput extends DumpPipeOutput {
01172 
01176         function __construct( $file ) {
01177                 $command = $this->setup7zCommand( $file );
01178                 parent::__construct( $command );
01179                 $this->filename = $file;
01180         }
01181 
01186         function setup7zCommand( $file ) {
01187                 $command = "7za a -bd -si " . wfEscapeShellArg( $file );
01188                 // Suppress annoying useless crap from p7zip
01189                 // Unfortunately this could suppress real error messages too
01190                 $command .= ' >' . wfGetNull() . ' 2>&1';
01191                 return( $command );
01192         }
01193 
01198         function closeAndRename( $newname, $open = false ) {
01199                 $newname = $this->checkRenameArgCount( $newname );
01200                 if ( $newname ) {
01201                         fclose( $this->handle );
01202                         proc_close( $this->procOpenResource );
01203                         $this->renameOrException( $newname );
01204                         if ( $open ) {
01205                                 $command = $this->setup7zCommand( $this->filename );
01206                                 $this->startCommand( $command );
01207                         }
01208                 }
01209         }
01210 }
01211 
01218 class DumpFilter {
01219 
01225         public $sink;
01226 
01230         protected $sendingThisPage;
01231 
01235         function __construct( &$sink ) {
01236                 $this->sink =& $sink;
01237         }
01238 
01242         function writeOpenStream( $string ) {
01243                 $this->sink->writeOpenStream( $string );
01244         }
01245 
01249         function writeCloseStream( $string ) {
01250                 $this->sink->writeCloseStream( $string );
01251         }
01252 
01257         function writeOpenPage( $page, $string ) {
01258                 $this->sendingThisPage = $this->pass( $page, $string );
01259                 if ( $this->sendingThisPage ) {
01260                         $this->sink->writeOpenPage( $page, $string );
01261                 }
01262         }
01263 
01267         function writeClosePage( $string ) {
01268                 if ( $this->sendingThisPage ) {
01269                         $this->sink->writeClosePage( $string );
01270                         $this->sendingThisPage = false;
01271                 }
01272         }
01273 
01278         function writeRevision( $rev, $string ) {
01279                 if ( $this->sendingThisPage ) {
01280                         $this->sink->writeRevision( $rev, $string );
01281                 }
01282         }
01283 
01288         function writeLogItem( $rev, $string ) {
01289                 $this->sink->writeRevision( $rev, $string );
01290         }
01291 
01295         function closeRenameAndReopen( $newname ) {
01296                 $this->sink->closeRenameAndReopen( $newname );
01297         }
01298 
01303         function closeAndRename( $newname, $open = false ) {
01304                 $this->sink->closeAndRename( $newname, $open );
01305         }
01306 
01310         function getFilenames() {
01311                 return $this->sink->getFilenames();
01312         }
01313 
01319         function pass( $page ) {
01320                 return true;
01321         }
01322 }
01323 
01328 class DumpNotalkFilter extends DumpFilter {
01329 
01334         function pass( $page ) {
01335                 return !MWNamespace::isTalk( $page->page_namespace );
01336         }
01337 }
01338 
01343 class DumpNamespaceFilter extends DumpFilter {
01344         var $invert = false;
01345         var $namespaces = array();
01346 
01352         function __construct( &$sink, $param ) {
01353                 parent::__construct( $sink );
01354 
01355                 $constants = array(
01356                         "NS_MAIN"           => NS_MAIN,
01357                         "NS_TALK"           => NS_TALK,
01358                         "NS_USER"           => NS_USER,
01359                         "NS_USER_TALK"      => NS_USER_TALK,
01360                         "NS_PROJECT"        => NS_PROJECT,
01361                         "NS_PROJECT_TALK"   => NS_PROJECT_TALK,
01362                         "NS_FILE"           => NS_FILE,
01363                         "NS_FILE_TALK"      => NS_FILE_TALK,
01364                         "NS_IMAGE"          => NS_IMAGE,  // NS_IMAGE is an alias for NS_FILE
01365                         "NS_IMAGE_TALK"     => NS_IMAGE_TALK,
01366                         "NS_MEDIAWIKI"      => NS_MEDIAWIKI,
01367                         "NS_MEDIAWIKI_TALK" => NS_MEDIAWIKI_TALK,
01368                         "NS_TEMPLATE"       => NS_TEMPLATE,
01369                         "NS_TEMPLATE_TALK"  => NS_TEMPLATE_TALK,
01370                         "NS_HELP"           => NS_HELP,
01371                         "NS_HELP_TALK"      => NS_HELP_TALK,
01372                         "NS_CATEGORY"       => NS_CATEGORY,
01373                         "NS_CATEGORY_TALK"  => NS_CATEGORY_TALK );
01374 
01375                 if ( $param { 0 } == '!' ) {
01376                         $this->invert = true;
01377                         $param = substr( $param, 1 );
01378                 }
01379 
01380                 foreach ( explode( ',', $param ) as $key ) {
01381                         $key = trim( $key );
01382                         if ( isset( $constants[$key] ) ) {
01383                                 $ns = $constants[$key];
01384                                 $this->namespaces[$ns] = true;
01385                         } elseif ( is_numeric( $key ) ) {
01386                                 $ns = intval( $key );
01387                                 $this->namespaces[$ns] = true;
01388                         } else {
01389                                 throw new MWException( "Unrecognized namespace key '$key'\n" );
01390                         }
01391                 }
01392         }
01393 
01398         function pass( $page ) {
01399                 $match = isset( $this->namespaces[$page->page_namespace] );
01400                 return $this->invert xor $match;
01401         }
01402 }
01403 
01404 
01409 class DumpLatestFilter extends DumpFilter {
01410         var $page, $pageString, $rev, $revString;
01411 
01416         function writeOpenPage( $page, $string ) {
01417                 $this->page = $page;
01418                 $this->pageString = $string;
01419         }
01420 
01424         function writeClosePage( $string ) {
01425                 if ( $this->rev ) {
01426                         $this->sink->writeOpenPage( $this->page, $this->pageString );
01427                         $this->sink->writeRevision( $this->rev, $this->revString );
01428                         $this->sink->writeClosePage( $string );
01429                 }
01430                 $this->rev = null;
01431                 $this->revString = null;
01432                 $this->page = null;
01433                 $this->pageString = null;
01434         }
01435 
01440         function writeRevision( $rev, $string ) {
01441                 if ( $rev->rev_id == $this->page->page_latest ) {
01442                         $this->rev = $rev;
01443                         $this->revString = $string;
01444                 }
01445         }
01446 }
01447 
01452 class DumpMultiWriter {
01453 
01457         function __construct( $sinks ) {
01458                 $this->sinks = $sinks;
01459                 $this->count = count( $sinks );
01460         }
01461 
01465         function writeOpenStream( $string ) {
01466                 for ( $i = 0; $i < $this->count; $i++ ) {
01467                         $this->sinks[$i]->writeOpenStream( $string );
01468                 }
01469         }
01470 
01474         function writeCloseStream( $string ) {
01475                 for ( $i = 0; $i < $this->count; $i++ ) {
01476                         $this->sinks[$i]->writeCloseStream( $string );
01477                 }
01478         }
01479 
01484         function writeOpenPage( $page, $string ) {
01485                 for ( $i = 0; $i < $this->count; $i++ ) {
01486                         $this->sinks[$i]->writeOpenPage( $page, $string );
01487                 }
01488         }
01489 
01493         function writeClosePage( $string ) {
01494                 for ( $i = 0; $i < $this->count; $i++ ) {
01495                         $this->sinks[$i]->writeClosePage( $string );
01496                 }
01497         }
01498 
01503         function writeRevision( $rev, $string ) {
01504                 for ( $i = 0; $i < $this->count; $i++ ) {
01505                         $this->sinks[$i]->writeRevision( $rev, $string );
01506                 }
01507         }
01508 
01512         function closeRenameAndReopen( $newnames ) {
01513                 $this->closeAndRename( $newnames, true );
01514         }
01515 
01520         function closeAndRename( $newnames, $open = false ) {
01521                 for ( $i = 0; $i < $this->count; $i++ ) {
01522                         $this->sinks[$i]->closeAndRename( $newnames[$i], $open );
01523                 }
01524         }
01525 
01529         function getFilenames() {
01530                 $filenames = array();
01531                 for ( $i = 0; $i < $this->count; $i++ ) {
01532                         $filenames[] =  $this->sinks[$i]->getFilenames();
01533                 }
01534                 return $filenames;
01535         }
01536 
01537 }
01538 
01543 function xmlsafe( $string ) {
01544         wfProfileIn( __FUNCTION__ );
01545 
01551         $string = UtfNormal::cleanUp( $string );
01552 
01553         $string = htmlspecialchars( $string );
01554         wfProfileOut( __FUNCTION__ );
01555         return $string;
01556 }