MediaWiki  master
LanguageConverter.php
Go to the documentation of this file.
00001 <?php
00031 class LanguageConverter {
00032 
00038         static public $languagesWithVariants = array(
00039                 'gan',
00040                 'iu',
00041                 'kk',
00042                 'ku',
00043                 'shi',
00044                 'sr',
00045                 'tg',
00046                 'uz',
00047                 'zh',
00048         );
00049 
00050         public $mMainLanguageCode;
00051         public $mVariants, $mVariantFallbacks, $mVariantNames;
00052         public $mTablesLoaded = false;
00053         public $mTables;
00054         // 'bidirectional' 'unidirectional' 'disable' for each variant
00055         public $mManualLevel;
00056 
00060         public $mCacheKey;
00061 
00062         public $mLangObj;
00063         public $mFlags;
00064         public $mDescCodeSep = ':', $mDescVarSep = ';';
00065         public $mUcfirst = false;
00066         public $mConvRuleTitle = false;
00067         public $mURLVariant;
00068         public $mUserVariant;
00069         public $mHeaderVariant;
00070         public $mMaxDepth = 10;
00071         public $mVarSeparatorPattern;
00072 
00073         const CACHE_VERSION_KEY = 'VERSION 6';
00074 
00085         public function __construct( $langobj, $maincode, $variants = array(),
00086                                                                 $variantfallbacks = array(), $flags = array(),
00087                                                                 $manualLevel = array() ) {
00088                 global $wgDisabledVariants;
00089                 $this->mLangObj = $langobj;
00090                 $this->mMainLanguageCode = $maincode;
00091                 $this->mVariants = array_diff( $variants, $wgDisabledVariants );
00092                 $this->mVariantFallbacks = $variantfallbacks;
00093                 $this->mVariantNames = Language::fetchLanguageNames();
00094                 $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
00095                 $defaultflags = array(
00096                         // 'S' show converted text
00097                         // '+' add rules for alltext
00098                         // 'E' the gave flags is error
00099                         // these flags above are reserved for program
00100                         'A' => 'A',       // add rule for convert code (all text convert)
00101                         'T' => 'T',       // title convert
00102                         'R' => 'R',       // raw content
00103                         'D' => 'D',       // convert description (subclass implement)
00104                         '-' => '-',       // remove convert (not implement)
00105                         'H' => 'H',       // add rule for convert code
00106                                                   // (but no display in placed code)
00107                         'N' => 'N'        // current variant name
00108                 );
00109                 $this->mFlags = array_merge( $defaultflags, $flags );
00110                 foreach ( $this->mVariants as $v ) {
00111                         if ( array_key_exists( $v, $manualLevel ) ) {
00112                                 $this->mManualLevel[$v] = $manualLevel[$v];
00113                         } else {
00114                                 $this->mManualLevel[$v] = 'bidirectional';
00115                         }
00116                         $this->mFlags[$v] = $v;
00117                 }
00118         }
00119 
00126         public function getVariants() {
00127                 return $this->mVariants;
00128         }
00129 
00141         public function getVariantFallbacks( $variant ) {
00142                 if ( isset( $this->mVariantFallbacks[$variant] ) ) {
00143                         return $this->mVariantFallbacks[$variant];
00144                 }
00145                 return $this->mMainLanguageCode;
00146         }
00147 
00152         public function getConvRuleTitle() {
00153                 return $this->mConvRuleTitle;
00154         }
00155 
00160         public function getPreferredVariant() {
00161                 global $wgDefaultLanguageVariant, $wgUser;
00162 
00163                 $req = $this->getURLVariant();
00164 
00165                 if ( $wgUser->isLoggedIn() && !$req ) {
00166                         $req = $this->getUserVariant();
00167                 } elseif ( !$req ) {
00168                         $req = $this->getHeaderVariant();
00169                 }
00170 
00171                 if ( $wgDefaultLanguageVariant && !$req ) {
00172                         $req = $this->validateVariant( $wgDefaultLanguageVariant );
00173                 }
00174 
00175                 // This function, unlike the other get*Variant functions, is
00176                 // not memoized (i.e. there return value is not cached) since
00177                 // new information might appear during processing after this
00178                 // is first called.
00179                 if ( $this->validateVariant( $req ) ) {
00180                         return $req;
00181                 }
00182                 return $this->mMainLanguageCode;
00183         }
00184 
00190         public function getDefaultVariant() {
00191                 global $wgDefaultLanguageVariant;
00192 
00193                 $req = $this->getURLVariant();
00194 
00195                 if ( !$req ) {
00196                         $req = $this->getHeaderVariant();
00197                 }
00198 
00199                 if ( $wgDefaultLanguageVariant && !$req ) {
00200                         $req = $this->validateVariant( $wgDefaultLanguageVariant );
00201                 }
00202 
00203                 if ( $req ) {
00204                         return $req;
00205                 }
00206                 return $this->mMainLanguageCode;
00207         }
00208 
00214         public function validateVariant( $variant = null ) {
00215                 if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
00216                         return $variant;
00217                 }
00218                 return null;
00219         }
00220 
00226         public function getURLVariant() {
00227                 global $wgRequest;
00228 
00229                 if ( $this->mURLVariant ) {
00230                         return $this->mURLVariant;
00231                 }
00232 
00233                 // see if the preference is set in the request
00234                 $ret = $wgRequest->getText( 'variant' );
00235 
00236                 if ( !$ret ) {
00237                         $ret = $wgRequest->getVal( 'uselang' );
00238                 }
00239 
00240                 return $this->mURLVariant = $this->validateVariant( $ret );
00241         }
00242 
00248         protected function getUserVariant() {
00249                 global $wgUser;
00250 
00251                 // memoizing this function wreaks havoc on parserTest.php
00252                 /*
00253                 if ( $this->mUserVariant ) {
00254                         return $this->mUserVariant;
00255                 }
00256                 */
00257 
00258                 // Get language variant preference from logged in users
00259                 // Don't call this on stub objects because that causes infinite
00260                 // recursion during initialisation
00261                 if ( $wgUser->isLoggedIn() )  {
00262                         $ret = $wgUser->getOption( 'variant' );
00263                 } else {
00264                         // figure out user lang without constructing wgLang to avoid
00265                         // infinite recursion
00266                         $ret = $wgUser->getOption( 'language' );
00267                 }
00268 
00269                 return $this->mUserVariant = $this->validateVariant( $ret );
00270         }
00271 
00277         protected function getHeaderVariant() {
00278                 global $wgRequest;
00279 
00280                 if ( $this->mHeaderVariant ) {
00281                         return $this->mHeaderVariant;
00282                 }
00283 
00284                 // see if some supported language variant is set in the
00285                 // HTTP header.
00286                 $languages = array_keys( $wgRequest->getAcceptLang() );
00287                 if ( empty( $languages ) ) {
00288                         return null;
00289                 }
00290 
00291                 $fallbackLanguages = array();
00292                 foreach ( $languages as $language ) {
00293                         $this->mHeaderVariant = $this->validateVariant( $language );
00294                         if ( $this->mHeaderVariant ) {
00295                                 break;
00296                         }
00297 
00298                         // To see if there are fallbacks of current language.
00299                         // We record these fallback variants, and process
00300                         // them later.
00301                         $fallbacks = $this->getVariantFallbacks( $language );
00302                         if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
00303                                 $fallbackLanguages[] = $fallbacks;
00304                         } elseif ( is_array( $fallbacks ) ) {
00305                                 $fallbackLanguages =
00306                                         array_merge( $fallbackLanguages, $fallbacks );
00307                         }
00308                 }
00309 
00310                 if ( !$this->mHeaderVariant ) {
00311                         // process fallback languages now
00312                         $fallback_languages = array_unique( $fallbackLanguages );
00313                         foreach ( $fallback_languages as $language ) {
00314                                 $this->mHeaderVariant = $this->validateVariant( $language );
00315                                 if ( $this->mHeaderVariant ) {
00316                                         break;
00317                                 }
00318                         }
00319                 }
00320 
00321                 return $this->mHeaderVariant;
00322         }
00323 
00334         public function autoConvert( $text, $toVariant = false ) {
00335                 wfProfileIn( __METHOD__ );
00336 
00337                 $this->loadTables();
00338 
00339                 if ( !$toVariant ) {
00340                         $toVariant = $this->getPreferredVariant();
00341                         if ( !$toVariant ) {
00342                                 wfProfileOut( __METHOD__ );
00343                                 return $text;
00344                         }
00345                 }
00346 
00347                 if( $this->guessVariant( $text, $toVariant ) ) {
00348                         wfProfileOut( __METHOD__ );
00349                         return $text;
00350                 }
00351 
00352                 /* we convert everything except:
00353                    1. HTML markups (anything between < and >)
00354                    2. HTML entities
00355                    3. placeholders created by the parser
00356                 */
00357                 global $wgParser;
00358                 if ( isset( $wgParser ) && $wgParser->UniqPrefix() != '' ) {
00359                         $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+';
00360                 } else {
00361                         $marker = '';
00362                 }
00363 
00364                 // this one is needed when the text is inside an HTML markup
00365                 $htmlfix = '|<[^>]+$|^[^<>]*>';
00366 
00367                 // disable convert to variants between <code></code> tags
00368                 $codefix = '<code>.+?<\/code>|';
00369                 // disable convertsion of <script type="text/javascript"> ... </script>
00370                 $scriptfix = '<script.*?>.*?<\/script>|';
00371                 // disable conversion of <pre xxxx> ... </pre>
00372                 $prefix = '<pre.*?>.*?<\/pre>|';
00373 
00374                 $reg = '/' . $codefix . $scriptfix . $prefix .
00375                         '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s';
00376                 $startPos = 0;
00377                 $sourceBlob = '';
00378                 $literalBlob = '';
00379 
00380                 // Guard against delimiter nulls in the input
00381                 $text = str_replace( "\000", '', $text );
00382 
00383                 $markupMatches = null;
00384                 $elementMatches = null;
00385                 while ( $startPos < strlen( $text ) ) {
00386                         if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
00387                                 $elementPos = $markupMatches[0][1];
00388                                 $element = $markupMatches[0][0];
00389                         } else {
00390                                 $elementPos = strlen( $text );
00391                                 $element = '';
00392                         }
00393 
00394                         // Queue the part before the markup for translation in a batch
00395                         $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
00396 
00397                         // Advance to the next position
00398                         $startPos = $elementPos + strlen( $element );
00399 
00400                         // Translate any alt or title attributes inside the matched element
00401                         if ( $element !== '' && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element,
00402                                 $elementMatches ) )
00403                         {
00404                                 $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
00405                                 $changed = false;
00406                                 foreach ( array( 'title', 'alt' ) as $attrName ) {
00407                                         if ( !isset( $attrs[$attrName] ) ) {
00408                                                 continue;
00409                                         }
00410                                         $attr = $attrs[$attrName];
00411                                         // Don't convert URLs
00412                                         if ( !strpos( $attr, '://' ) ) {
00413                                                 $attr = $this->convertTo( $attr, $toVariant );
00414                                         }
00415 
00416                                         // Remove HTML tags to avoid disrupting the layout
00417                                         $attr = preg_replace( '/<[^>]+>/', '', $attr );
00418                                         if ( $attr !== $attrs[$attrName] ) {
00419                                                 $attrs[$attrName] = $attr;
00420                                                 $changed = true;
00421                                         }
00422                                 }
00423                                 if ( $changed ) {
00424                                         $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
00425                                                 $elementMatches[3];
00426                                 }
00427                         }
00428                         $literalBlob .= $element . "\000";
00429                 }
00430 
00431                 // Do the main translation batch
00432                 $translatedBlob = $this->translate( $sourceBlob, $toVariant );
00433 
00434                 // Put the output back together
00435                 $translatedIter = StringUtils::explode( "\000", $translatedBlob );
00436                 $literalIter = StringUtils::explode( "\000", $literalBlob );
00437                 $output = '';
00438                 while ( $translatedIter->valid() && $literalIter->valid() ) {
00439                         $output .= $translatedIter->current();
00440                         $output .= $literalIter->current();
00441                         $translatedIter->next();
00442                         $literalIter->next();
00443                 }
00444 
00445                 wfProfileOut( __METHOD__ );
00446                 return $output;
00447         }
00448 
00458         public function translate( $text, $variant ) {
00459                 wfProfileIn( __METHOD__ );
00460                 // If $text is empty or only includes spaces, do nothing
00461                 // Otherwise translate it
00462                 if ( trim( $text ) ) {
00463                         $this->loadTables();
00464                         $text = $this->mTables[$variant]->replace( $text );
00465                 }
00466                 wfProfileOut( __METHOD__ );
00467                 return $text;
00468         }
00469 
00476         public function autoConvertToAllVariants( $text ) {
00477                 wfProfileIn( __METHOD__ );
00478                 $this->loadTables();
00479 
00480                 $ret = array();
00481                 foreach ( $this->mVariants as $variant ) {
00482                         $ret[$variant] = $this->translate( $text, $variant );
00483                 }
00484 
00485                 wfProfileOut( __METHOD__ );
00486                 return $ret;
00487         }
00488 
00500         public function convertLinkToAllVariants( $text ) {
00501                 return $this->autoConvertToAllVariants( $text );
00502         }
00503 
00509         protected function applyManualConv( $convRule ) {
00510                 // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
00511                 // title conversion.
00512                 // Bug 24072: $mConvRuleTitle was overwritten by other manual
00513                 // rule(s) not for title, this breaks the title conversion.
00514                 $newConvRuleTitle = $convRule->getTitle();
00515                 if ( $newConvRuleTitle ) {
00516                         // So I add an empty check for getTitle()
00517                         $this->mConvRuleTitle = $newConvRuleTitle;
00518                 }
00519 
00520                 // merge/remove manual conversion rules to/from global table
00521                 $convTable = $convRule->getConvTable();
00522                 $action = $convRule->getRulesAction();
00523                 foreach ( $convTable as $variant => $pair ) {
00524                         if ( !$this->validateVariant( $variant ) ) {
00525                                 continue;
00526                         }
00527 
00528                         if ( $action == 'add' ) {
00529                                 foreach ( $pair as $from => $to ) {
00530                                         // to ensure that $from and $to not be left blank
00531                                         // so $this->translate() could always return a string
00532                                         if ( $from || $to ) {
00533                                                 // more efficient than array_merge(), about 2.5 times.
00534                                                 $this->mTables[$variant]->setPair( $from, $to );
00535                                         }
00536                                 }
00537                         } elseif ( $action == 'remove' ) {
00538                                 $this->mTables[$variant]->removeArray( $pair );
00539                         }
00540                 }
00541         }
00542 
00550         public function convertTitle( $title ) {
00551                 $variant = $this->getPreferredVariant();
00552                 $index = $title->getNamespace();
00553                 if ( $index !== NS_MAIN ) {
00554                         $text = $this->convertNamespace( $index ) . ':';
00555                 } else {
00556                         $text = '';
00557                 }
00558                 $text .= $this->translate( $title->getText(), $variant );
00559                 return $text;
00560         }
00561 
00568         public function convertNamespace( $index ) {
00569                 $variant = $this->getPreferredVariant();
00570                 if ( $index === NS_MAIN ) {
00571                         return '';
00572                 } else {
00573                         // First check if a message gives a converted name in the target variant.
00574                         $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant );
00575                         if ( $nsConvMsg->exists() ) {
00576                                 return $nsConvMsg->plain();
00577                         }
00578                         // Then check if a message gives a converted name in content language
00579                         // which needs extra translation to the target variant.
00580                         $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
00581                         if ( $nsConvMsg->exists() ) {
00582                                 return $this->translate( $nsConvMsg->plain(), $variant );
00583                         }
00584                         // No message exists, retrieve it from the target variant's namespace names.
00585                         $langObj = $this->mLangObj->factory( $variant );
00586                         return $langObj->getFormattedNsText( $index );
00587                 }
00588         }
00589 
00604         public function convert( $text ) {
00605                 $variant = $this->getPreferredVariant();
00606                 return $this->convertTo( $text, $variant );
00607         }
00608 
00616         public function convertTo( $text, $variant ) {
00617                 global $wgDisableLangConversion;
00618                 if ( $wgDisableLangConversion ) {
00619                         return $text;
00620                 }
00621                 return $this->recursiveConvertTopLevel( $text, $variant );
00622         }
00623 
00633         protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
00634                 $startPos = 0;
00635                 $out = '';
00636                 $length = strlen( $text );
00637                 $shouldConvert = !$this->guessVariant( $text, $variant );
00638 
00639                 while ( $startPos < $length ) {
00640                         $pos = strpos( $text, '-{', $startPos );
00641 
00642                         if ( $pos === false ) {
00643                                 // No more markup, append final segment
00644                                 $fragment = substr( $text, $startPos );
00645                                 $out .= $shouldConvert? $this->autoConvert( $fragment, $variant ): $fragment;
00646                                 return $out;
00647                         }
00648 
00649                         // Markup found
00650                         // Append initial segment
00651                         $fragment = substr( $text, $startPos, $pos - $startPos );
00652                         $out .= $shouldConvert? $this->autoConvert( $fragment, $variant ): $fragment;
00653 
00654                         // Advance position
00655                         $startPos = $pos;
00656 
00657                         // Do recursive conversion
00658                         $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
00659                 }
00660 
00661                 return $out;
00662         }
00663 
00675         protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
00676                 // Quick sanity check (no function calls)
00677                 if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
00678                         throw new MWException( __METHOD__ . ': invalid input string' );
00679                 }
00680 
00681                 $startPos += 2;
00682                 $inner = '';
00683                 $warningDone = false;
00684                 $length = strlen( $text );
00685 
00686                 while ( $startPos < $length ) {
00687                         $m = false;
00688                         preg_match( '/-\{|\}-/', $text, $m,  PREG_OFFSET_CAPTURE, $startPos );
00689                         if ( !$m ) {
00690                                 // Unclosed rule
00691                                 break;
00692                         }
00693 
00694                         $token = $m[0][0];
00695                         $pos = $m[0][1];
00696 
00697                         // Markup found
00698                         // Append initial segment
00699                         $inner .= substr( $text, $startPos, $pos - $startPos );
00700 
00701                         // Advance position
00702                         $startPos = $pos;
00703 
00704                         switch ( $token ) {
00705                                 case '-{':
00706                                         // Check max depth
00707                                         if ( $depth >= $this->mMaxDepth ) {
00708                                                 $inner .= '-{';
00709                                                 if ( !$warningDone ) {
00710                                                         $inner .= '<span class="error">' .
00711                                                                 wfMessage( 'language-converter-depth-warning' )
00712                                                                         ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
00713                                                                 '</span>';
00714                                                         $warningDone = true;
00715                                                 }
00716                                                 $startPos += 2;
00717                                                 continue;
00718                                         }
00719                                         // Recursively parse another rule
00720                                         $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
00721                                         break;
00722                                 case '}-':
00723                                         // Apply the rule
00724                                         $startPos += 2;
00725                                         $rule = new ConverterRule( $inner, $this );
00726                                         $rule->parse( $variant );
00727                                         $this->applyManualConv( $rule );
00728                                         return $rule->getDisplay();
00729                                 default:
00730                                         throw new MWException( __METHOD__ . ': invalid regex match' );
00731                         }
00732                 }
00733 
00734                 // Unclosed rule
00735                 if ( $startPos < $length ) {
00736                         $inner .= substr( $text, $startPos );
00737                 }
00738                 $startPos = $length;
00739                 return '-{' . $this->autoConvert( $inner, $variant );
00740         }
00741 
00753         public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
00754                 # If the article has already existed, there is no need to
00755                 # check it again, otherwise it may cause a fault.
00756                 if ( is_object( $nt ) && $nt->exists() ) {
00757                         return;
00758                 }
00759 
00760                 global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest,
00761                         $wgUser;
00762                 $isredir = $wgRequest->getText( 'redirect', 'yes' );
00763                 $action = $wgRequest->getText( 'action' );
00764                 $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
00765                 $disableLinkConversion = $wgDisableLangConversion
00766                         || $wgDisableTitleConversion;
00767                 $linkBatch = new LinkBatch();
00768 
00769                 $ns = NS_MAIN;
00770 
00771                 if ( $disableLinkConversion ||
00772                          ( !$ignoreOtherCond &&
00773                            ( $isredir == 'no'
00774                                  || $action == 'edit'
00775                                  || $action == 'submit'
00776                                  || $linkconvert == 'no'
00777                                  || $wgUser->getOption( 'noconvertlink' ) == 1 ) ) ) {
00778                         return;
00779                 }
00780 
00781                 if ( is_object( $nt ) ) {
00782                         $ns = $nt->getNamespace();
00783                 }
00784 
00785                 $variants = $this->autoConvertToAllVariants( $link );
00786                 if ( !$variants ) { // give up
00787                         return;
00788                 }
00789 
00790                 $titles = array();
00791 
00792                 foreach ( $variants as $v ) {
00793                         if ( $v != $link ) {
00794                                 $varnt = Title::newFromText( $v, $ns );
00795                                 if ( !is_null( $varnt ) ) {
00796                                         $linkBatch->addObj( $varnt );
00797                                         $titles[] = $varnt;
00798                                 }
00799                         }
00800                 }
00801 
00802                 // fetch all variants in single query
00803                 $linkBatch->execute();
00804 
00805                 foreach ( $titles as $varnt ) {
00806                         if ( $varnt->getArticleID() > 0 ) {
00807                                 $nt = $varnt;
00808                                 $link = $varnt->getText();
00809                                 break;
00810                         }
00811                 }
00812         }
00813 
00819         public function getExtraHashOptions() {
00820                 $variant = $this->getPreferredVariant();
00821                 return '!' . $variant;
00822         }
00823 
00834         public function guessVariant($text, $variant) {
00835                 return false;
00836         }
00837 
00845         function loadDefaultTables() {
00846                 $name = get_class( $this );
00847                 throw new MWException( "Must implement loadDefaultTables() method in class $name" );
00848         }
00849 
00855         function loadTables( $fromCache = true ) {
00856                 global $wgLangConvMemc;
00857 
00858                 if ( $this->mTablesLoaded ) {
00859                         return;
00860                 }
00861 
00862                 wfProfileIn( __METHOD__ );
00863                 $this->mTablesLoaded = true;
00864                 $this->mTables = false;
00865                 if ( $fromCache ) {
00866                         wfProfileIn( __METHOD__ . '-cache' );
00867                         $this->mTables = $wgLangConvMemc->get( $this->mCacheKey );
00868                         wfProfileOut( __METHOD__ . '-cache' );
00869                 }
00870                 if ( !$this->mTables
00871                          || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
00872                         wfProfileIn( __METHOD__ . '-recache' );
00873                         // not in cache, or we need a fresh reload.
00874                         // We will first load the default tables
00875                         // then update them using things in MediaWiki:Conversiontable/*
00876                         $this->loadDefaultTables();
00877                         foreach ( $this->mVariants as $var ) {
00878                                 $cached = $this->parseCachedTable( $var );
00879                                 $this->mTables[$var]->mergeArray( $cached );
00880                         }
00881 
00882                         $this->postLoadTables();
00883                         $this->mTables[self::CACHE_VERSION_KEY] = true;
00884 
00885                         $wgLangConvMemc->set( $this->mCacheKey, $this->mTables, 43200 );
00886                         wfProfileOut( __METHOD__ . '-recache' );
00887                 }
00888                 wfProfileOut( __METHOD__ );
00889         }
00890 
00894         function postLoadTables() { }
00895 
00901         function reloadTables() {
00902                 if ( $this->mTables ) {
00903                         unset( $this->mTables );
00904                 }
00905                 $this->mTablesLoaded = false;
00906                 $this->loadTables( false );
00907         }
00908 
00928         function parseCachedTable( $code, $subpage = '', $recursive = true ) {
00929                 static $parsed = array();
00930 
00931                 $key = 'Conversiontable/' . $code;
00932                 if ( $subpage ) {
00933                         $key .= '/' . $subpage;
00934                 }
00935                 if ( array_key_exists( $key, $parsed ) ) {
00936                         return array();
00937                 }
00938 
00939                 $parsed[$key] = true;
00940 
00941                 if ( $subpage === '' ) {
00942                         $txt = MessageCache::singleton()->get( 'conversiontable', true, $code );
00943                 } else {
00944                         $txt = false;
00945                         $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key );
00946                         if ( $title && $title->exists() ) {
00947                                 $revision = Revision::newFromTitle( $title );
00948                                 if ( $revision ) {
00949                                         if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
00950                                                 $txt = $revision->getContent( Revision::RAW )->getNativeData();
00951                                         }
00952 
00953                                         //@todo: in the future, use a specialized content model, perhaps based on json!
00954                                 }
00955                         }
00956                 }
00957 
00958                 # Nothing to parse if there's no text
00959                 if ( $txt === false || $txt === null || $txt === '' ) {
00960                         return array();
00961                 }
00962 
00963                 // get all subpage links of the form
00964                 // [[MediaWiki:Conversiontable/zh-xx/...|...]]
00965                 $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
00966                         ':Conversiontable';
00967                 $subs = StringUtils::explode( '[[', $txt );
00968                 $sublinks = array();
00969                 foreach ( $subs as $sub ) {
00970                         $link = explode( ']]', $sub, 2 );
00971                         if ( count( $link ) != 2 ) {
00972                                 continue;
00973                         }
00974                         $b = explode( '|', $link[0], 2 );
00975                         $b = explode( '/', trim( $b[0] ), 3 );
00976                         if ( count( $b ) == 3 ) {
00977                                 $sublink = $b[2];
00978                         } else {
00979                                 $sublink = '';
00980                         }
00981 
00982                         if ( $b[0] == $linkhead && $b[1] == $code ) {
00983                                 $sublinks[] = $sublink;
00984                         }
00985                 }
00986 
00987                 // parse the mappings in this page
00988                 $blocks = StringUtils::explode( '-{', $txt );
00989                 $ret = array();
00990                 $first = true;
00991                 foreach ( $blocks as $block ) {
00992                         if ( $first ) {
00993                                 // Skip the part before the first -{
00994                                 $first = false;
00995                                 continue;
00996                         }
00997                         $mappings = explode( '}-', $block, 2 );
00998                         $stripped = str_replace( array( "'", '"', '*', '#' ), '',
00999                                                                          $mappings[0] );
01000                         $table = StringUtils::explode( ';', $stripped );
01001                         foreach ( $table as $t ) {
01002                                 $m = explode( '=>', $t, 3 );
01003                                 if ( count( $m ) != 2 ) {
01004                                         continue;
01005                                 }
01006                                 // trim any trailling comments starting with '//'
01007                                 $tt = explode( '//', $m[1], 2 );
01008                                 $ret[trim( $m[0] )] = trim( $tt[0] );
01009                         }
01010                 }
01011 
01012                 // recursively parse the subpages
01013                 if ( $recursive ) {
01014                         foreach ( $sublinks as $link ) {
01015                                 $s = $this->parseCachedTable( $code, $link, $recursive );
01016                                 $ret = array_merge( $ret, $s );
01017                         }
01018                 }
01019 
01020                 if ( $this->mUcfirst ) {
01021                         foreach ( $ret as $k => $v ) {
01022                                 $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
01023                         }
01024                 }
01025                 return $ret;
01026         }
01027 
01036         public function markNoConversion( $text, $noParse = false ) {
01037                 # don't mark if already marked
01038                 if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
01039                         return $text;
01040                 }
01041 
01042                 $ret = "-{R|$text}-";
01043                 return $ret;
01044         }
01045 
01054         function convertCategoryKey( $key ) {
01055                 return $key;
01056         }
01057 
01074         function OnPageContentSaveComplete( $page, $user, $content, $summary, $isMinor,
01075                         $isWatch, $section, $flags, $revision ) {
01076                 $titleobj = $page->getTitle();
01077                 if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
01078                         $title = $titleobj->getDBkey();
01079                         $t = explode( '/', $title, 3 );
01080                         $c = count( $t );
01081                         if ( $c > 1 && $t[0] == 'Conversiontable' ) {
01082                                 if ( $this->validateVariant( $t[1] ) ) {
01083                                         $this->reloadTables();
01084                                 }
01085                         }
01086                 }
01087                 return true;
01088         }
01089 
01098         public function armourMath( $text ) {
01099                 // convert '-{' and '}-' to '-&#123;' and '&#125;-' to prevent
01100                 // any unwanted markup appearing in the math image tag.
01101                 $text = strtr( $text, array( '-{' => '-&#123;', '}-' => '&#125;-' ) );
01102                 return $text;
01103         }
01104 
01108         function getVarSeparatorPattern() {
01109                 if ( is_null( $this->mVarSeparatorPattern ) ) {
01110                         // varsep_pattern for preg_split:
01111                         // text should be splited by ";" only if a valid variant
01112                         // name exist after the markup, for example:
01113                         //  -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
01114                         //      <span style="font-size:120%;">yyy</span>;}-
01115                         // we should split it as:
01116                         //  array(
01117                         //        [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
01118                         //        [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
01119                         //        [2] => ''
01120                         //       )
01121                         $pat = '/;\s*(?=';
01122                         foreach ( $this->mVariants as $variant ) {
01123                                 // zh-hans:xxx;zh-hant:yyy
01124                                 $pat .= $variant . '\s*:|';
01125                                 // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
01126                                 $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
01127                         }
01128                         $pat .= '\s*$)/';
01129                         $this->mVarSeparatorPattern = $pat;
01130                 }
01131                 return $this->mVarSeparatorPattern;
01132         }
01133 }
01134 
01140 class ConverterRule {
01141         public $mText; // original text in -{text}-
01142         public $mConverter; // LanguageConverter object
01143         public $mRuleDisplay = '';
01144         public $mRuleTitle = false;
01145         public $mRules = '';// string : the text of the rules
01146         public $mRulesAction = 'none';
01147         public $mFlags = array();
01148         public $mVariantFlags = array();
01149         public $mConvTable = array();
01150         public $mBidtable = array();// array of the translation in each variant
01151         public $mUnidtable = array();// array of the translation in each variant
01152 
01159         public function __construct( $text, $converter ) {
01160                 $this->mText = $text;
01161                 $this->mConverter = $converter;
01162         }
01163 
01170         public function getTextInBidtable( $variants ) {
01171                 $variants = (array)$variants;
01172                 if ( !$variants ) {
01173                         return false;
01174                 }
01175                 foreach ( $variants as $variant ) {
01176                         if ( isset( $this->mBidtable[$variant] ) ) {
01177                                 return $this->mBidtable[$variant];
01178                         }
01179                 }
01180                 return false;
01181         }
01182 
01187         function parseFlags() {
01188                 $text = $this->mText;
01189                 $flags = array();
01190                 $variantFlags = array();
01191 
01192                 $sepPos = strpos( $text, '|' );
01193                 if ( $sepPos !== false ) {
01194                         $validFlags = $this->mConverter->mFlags;
01195                         $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
01196                         foreach ( $f as $ff ) {
01197                                 $ff = trim( $ff );
01198                                 if ( isset( $validFlags[$ff] ) ) {
01199                                         $flags[$validFlags[$ff]] = true;
01200                                 }
01201                         }
01202                         $text = strval( substr( $text, $sepPos + 1 ) );
01203                 }
01204 
01205                 if ( !$flags ) {
01206                         $flags['S'] = true;
01207                 } elseif ( isset( $flags['R'] ) ) {
01208                         $flags = array( 'R' => true );// remove other flags
01209                 } elseif ( isset( $flags['N'] ) ) {
01210                         $flags = array( 'N' => true );// remove other flags
01211                 } elseif ( isset( $flags['-'] ) ) {
01212                         $flags = array( '-' => true );// remove other flags
01213                 } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) {
01214                         $flags['H'] = true;
01215                 } elseif ( isset( $flags['H'] ) ) {
01216                         // replace A flag, and remove other flags except T
01217                         $temp = array( '+' => true, 'H' => true );
01218                         if ( isset( $flags['T'] ) ) {
01219                                 $temp['T'] = true;
01220                         }
01221                         if ( isset( $flags['D'] ) ) {
01222                                 $temp['D'] = true;
01223                         }
01224                         $flags = $temp;
01225                 } else {
01226                         if ( isset( $flags['A'] ) ) {
01227                                 $flags['+'] = true;
01228                                 $flags['S'] = true;
01229                         }
01230                         if ( isset( $flags['D'] ) ) {
01231                                 unset( $flags['S'] );
01232                         }
01233                         // try to find flags like "zh-hans", "zh-hant"
01234                         // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
01235                         $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants );
01236                         if ( $variantFlags ) {
01237                                 $variantFlags = array_flip( $variantFlags );
01238                                 $flags = array();
01239                         }
01240                 }
01241                 $this->mVariantFlags = $variantFlags;
01242                 $this->mRules = $text;
01243                 $this->mFlags = $flags;
01244         }
01245 
01250         function parseRules() {
01251                 $rules = $this->mRules;
01252                 $bidtable = array();
01253                 $unidtable = array();
01254                 $variants = $this->mConverter->mVariants;
01255                 $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
01256 
01257                 $choice = preg_split( $varsep_pattern, $rules );
01258 
01259                 foreach ( $choice as $c ) {
01260                         $v  = explode( ':', $c, 2 );
01261                         if ( count( $v ) != 2 ) {
01262                                 // syntax error, skip
01263                                 continue;
01264                         }
01265                         $to = trim( $v[1] );
01266                         $v  = trim( $v[0] );
01267                         $u  = explode( '=>', $v, 2 );
01268                         // if $to is empty, strtr() could return a wrong result
01269                         if ( count( $u ) == 1 && $to && in_array( $v, $variants ) ) {
01270                                 $bidtable[$v] = $to;
01271                         } elseif ( count( $u ) == 2 ) {
01272                                 $from = trim( $u[0] );
01273                                 $v = trim( $u[1] );
01274                                 if ( array_key_exists( $v, $unidtable )
01275                                          && !is_array( $unidtable[$v] )
01276                                          && $to
01277                                          && in_array( $v, $variants ) ) {
01278                                         $unidtable[$v] = array( $from => $to );
01279                                 } elseif ( $to && in_array( $v, $variants ) ) {
01280                                         $unidtable[$v][$from] = $to;
01281                                 }
01282                         }
01283                         // syntax error, pass
01284                         if ( !isset( $this->mConverter->mVariantNames[$v] ) ) {
01285                                 $bidtable = array();
01286                                 $unidtable = array();
01287                                 break;
01288                         }
01289                 }
01290                 $this->mBidtable = $bidtable;
01291                 $this->mUnidtable = $unidtable;
01292         }
01293 
01299         function getRulesDesc() {
01300                 $codesep = $this->mConverter->mDescCodeSep;
01301                 $varsep = $this->mConverter->mDescVarSep;
01302                 $text = '';
01303                 foreach ( $this->mBidtable as $k => $v ) {
01304                         $text .= $this->mConverter->mVariantNames[$k] . "$codesep$v$varsep";
01305                 }
01306                 foreach ( $this->mUnidtable as $k => $a ) {
01307                         foreach ( $a as $from => $to ) {
01308                                 $text .= $from . '⇒' . $this->mConverter->mVariantNames[$k] .
01309                                         "$codesep$to$varsep";
01310                         }
01311                 }
01312                 return $text;
01313         }
01314 
01323         function getRuleConvertedStr( $variant ) {
01324                 $bidtable = $this->mBidtable;
01325                 $unidtable = $this->mUnidtable;
01326 
01327                 if ( count( $bidtable ) + count( $unidtable ) == 0 ) {
01328                         return $this->mRules;
01329                 } else {
01330                         // display current variant in bidirectional array
01331                         $disp = $this->getTextInBidtable( $variant );
01332                         // or display current variant in fallbacks
01333                         if ( !$disp ) {
01334                                 $disp = $this->getTextInBidtable(
01335                                                 $this->mConverter->getVariantFallbacks( $variant ) );
01336                         }
01337                         // or display current variant in unidirectional array
01338                         if ( !$disp && array_key_exists( $variant, $unidtable ) ) {
01339                                 $disp = array_values( $unidtable[$variant] );
01340                                 $disp = $disp[0];
01341                         }
01342                         // or display frist text under disable manual convert
01343                         if ( !$disp
01344                                  && $this->mConverter->mManualLevel[$variant] == 'disable' ) {
01345                                 if ( count( $bidtable ) > 0 ) {
01346                                         $disp = array_values( $bidtable );
01347                                         $disp = $disp[0];
01348                                 } else {
01349                                         $disp = array_values( $unidtable );
01350                                         $disp = array_values( $disp[0] );
01351                                         $disp = $disp[0];
01352                                 }
01353                         }
01354                         return $disp;
01355                 }
01356         }
01357 
01362         function generateConvTable() {
01363                 // Special case optimisation
01364                 if ( !$this->mBidtable && !$this->mUnidtable ) {
01365                         $this->mConvTable = array();
01366                         return;
01367                 }
01368 
01369                 $bidtable = $this->mBidtable;
01370                 $unidtable = $this->mUnidtable;
01371                 $manLevel = $this->mConverter->mManualLevel;
01372 
01373                 $vmarked = array();
01374                 foreach ( $this->mConverter->mVariants as $v ) {
01375                         /* for bidirectional array
01376                                 fill in the missing variants, if any,
01377                                 with fallbacks */
01378                         if ( !isset( $bidtable[$v] ) ) {
01379                                 $variantFallbacks =
01380                                         $this->mConverter->getVariantFallbacks( $v );
01381                                 $vf = $this->getTextInBidtable( $variantFallbacks );
01382                                 if ( $vf ) {
01383                                         $bidtable[$v] = $vf;
01384                                 }
01385                         }
01386 
01387                         if ( isset( $bidtable[$v] ) ) {
01388                                 foreach ( $vmarked as $vo ) {
01389                                         // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
01390                                         // or -{H|zh:WordZh;zh-tw:WordTw}-
01391                                         // or -{-|zh:WordZh;zh-tw:WordTw}-
01392                                         // to introduce a custom mapping between
01393                                         // words WordZh and WordTw in the whole text
01394                                         if ( $manLevel[$v] == 'bidirectional' ) {
01395                                                 $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v];
01396                                         }
01397                                         if ( $manLevel[$vo] == 'bidirectional' ) {
01398                                                 $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo];
01399                                         }
01400                                 }
01401                                 $vmarked[] = $v;
01402                         }
01403                         /* for unidirectional array fill to convert tables */
01404                         if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' )
01405                                 && isset( $unidtable[$v] ) )
01406                         {
01407                                 if ( isset( $this->mConvTable[$v] ) ) {
01408                                         $this->mConvTable[$v] = array_merge( $this->mConvTable[$v], $unidtable[$v] );
01409                                 } else {
01410                                         $this->mConvTable[$v] = $unidtable[$v];
01411                                 }
01412                         }
01413                 }
01414         }
01415 
01420         public function parse( $variant = null ) {
01421                 if ( !$variant ) {
01422                         $variant = $this->mConverter->getPreferredVariant();
01423                 }
01424 
01425                 $this->parseFlags();
01426                 $flags = $this->mFlags;
01427 
01428                 // convert to specified variant
01429                 // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
01430                 if ( $this->mVariantFlags ) {
01431                         // check if current variant in flags
01432                         if ( isset( $this->mVariantFlags[$variant] ) ) {
01433                                 // then convert <text to convert> to current language
01434                                 $this->mRules = $this->mConverter->autoConvert( $this->mRules,
01435                                         $variant );
01436                         } else { // if current variant no in flags,
01437                                    // then we check its fallback variants.
01438                                 $variantFallbacks =
01439                                         $this->mConverter->getVariantFallbacks( $variant );
01440                                 if( is_array( $variantFallbacks ) ) {
01441                                         foreach ( $variantFallbacks as $variantFallback ) {
01442                                                 // if current variant's fallback exist in flags
01443                                                 if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
01444                                                         // then convert <text to convert> to fallback language
01445                                                         $this->mRules =
01446                                                                 $this->mConverter->autoConvert( $this->mRules,
01447                                                                         $variantFallback );
01448                                                         break;
01449                                                 }
01450                                         }
01451                                 }
01452                         }
01453                         $this->mFlags = $flags = array( 'R' => true );
01454                 }
01455 
01456                 if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
01457                         // decode => HTML entities modified by Sanitizer::removeHTMLtags
01458                         $this->mRules = str_replace( '=&gt;', '=>', $this->mRules );
01459                         $this->parseRules();
01460                 }
01461                 $rules = $this->mRules;
01462 
01463                 if ( !$this->mBidtable && !$this->mUnidtable ) {
01464                         if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
01465                                 // fill all variants if text in -{A/H/-|text} without rules
01466                                 foreach ( $this->mConverter->mVariants as $v ) {
01467                                         $this->mBidtable[$v] = $rules;
01468                                 }
01469                         } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
01470                                 $this->mFlags = $flags = array( 'R' => true );
01471                         }
01472                 }
01473 
01474                 $this->mRuleDisplay = false;
01475                 foreach ( $flags as $flag => $unused ) {
01476                         switch ( $flag ) {
01477                                 case 'R':
01478                                         // if we don't do content convert, still strip the -{}- tags
01479                                         $this->mRuleDisplay = $rules;
01480                                         break;
01481                                 case 'N':
01482                                         // process N flag: output current variant name
01483                                         $ruleVar = trim( $rules );
01484                                         if ( isset( $this->mConverter->mVariantNames[$ruleVar] ) ) {
01485                                                 $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar];
01486                                         } else {
01487                                                 $this->mRuleDisplay = '';
01488                                         }
01489                                         break;
01490                                 case 'D':
01491                                         // process D flag: output rules description
01492                                         $this->mRuleDisplay = $this->getRulesDesc();
01493                                         break;
01494                                 case 'H':
01495                                         // process H,- flag or T only: output nothing
01496                                         $this->mRuleDisplay = '';
01497                                         break;
01498                                 case '-':
01499                                         $this->mRulesAction = 'remove';
01500                                         $this->mRuleDisplay = '';
01501                                         break;
01502                                 case '+':
01503                                         $this->mRulesAction = 'add';
01504                                         $this->mRuleDisplay = '';
01505                                         break;
01506                                 case 'S':
01507                                         $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
01508                                         break;
01509                                 case 'T':
01510                                         $this->mRuleTitle = $this->getRuleConvertedStr( $variant );
01511                                         $this->mRuleDisplay = '';
01512                                         break;
01513                                 default:
01514                                         // ignore unknown flags (but see error case below)
01515                         }
01516                 }
01517                 if ( $this->mRuleDisplay === false ) {
01518                         $this->mRuleDisplay = '<span class="error">'
01519                                 . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
01520                                 . '</span>';
01521                 }
01522 
01523                 $this->generateConvTable();
01524         }
01525 
01529         public function hasRules() {
01530                 // TODO:
01531         }
01532 
01537         public function getDisplay() {
01538                 return $this->mRuleDisplay;
01539         }
01540 
01545         public function getTitle() {
01546                 return $this->mRuleTitle;
01547         }
01548 
01553         public function getRulesAction() {
01554                 return $this->mRulesAction;
01555         }
01556 
01562         public function getConvTable() {
01563                 return $this->mConvTable;
01564         }
01565 
01570         public function getRules() {
01571                 return $this->mRules;
01572         }
01573 
01578         public function getFlags() {
01579                 return $this->mFlags;
01580         }
01581 }