MediaWiki
master
|
00001 <?php 00031 class LanguageConverter { 00032 00038 static public $languagesWithVariants = array( 00039 'gan', 00040 'iu', 00041 'kk', 00042 'ku', 00043 'shi', 00044 'sr', 00045 'tg', 00046 'uz', 00047 'zh', 00048 ); 00049 00050 public $mMainLanguageCode; 00051 public $mVariants, $mVariantFallbacks, $mVariantNames; 00052 public $mTablesLoaded = false; 00053 public $mTables; 00054 // 'bidirectional' 'unidirectional' 'disable' for each variant 00055 public $mManualLevel; 00056 00060 public $mCacheKey; 00061 00062 public $mLangObj; 00063 public $mFlags; 00064 public $mDescCodeSep = ':', $mDescVarSep = ';'; 00065 public $mUcfirst = false; 00066 public $mConvRuleTitle = false; 00067 public $mURLVariant; 00068 public $mUserVariant; 00069 public $mHeaderVariant; 00070 public $mMaxDepth = 10; 00071 public $mVarSeparatorPattern; 00072 00073 const CACHE_VERSION_KEY = 'VERSION 6'; 00074 00085 public function __construct( $langobj, $maincode, $variants = array(), 00086 $variantfallbacks = array(), $flags = array(), 00087 $manualLevel = array() ) { 00088 global $wgDisabledVariants; 00089 $this->mLangObj = $langobj; 00090 $this->mMainLanguageCode = $maincode; 00091 $this->mVariants = array_diff( $variants, $wgDisabledVariants ); 00092 $this->mVariantFallbacks = $variantfallbacks; 00093 $this->mVariantNames = Language::fetchLanguageNames(); 00094 $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode ); 00095 $defaultflags = array( 00096 // 'S' show converted text 00097 // '+' add rules for alltext 00098 // 'E' the gave flags is error 00099 // these flags above are reserved for program 00100 'A' => 'A', // add rule for convert code (all text convert) 00101 'T' => 'T', // title convert 00102 'R' => 'R', // raw content 00103 'D' => 'D', // convert description (subclass implement) 00104 '-' => '-', // remove convert (not implement) 00105 'H' => 'H', // add rule for convert code 00106 // (but no display in placed code) 00107 'N' => 'N' // current variant name 00108 ); 00109 $this->mFlags = array_merge( $defaultflags, $flags ); 00110 foreach ( $this->mVariants as $v ) { 00111 if ( array_key_exists( $v, $manualLevel ) ) { 00112 $this->mManualLevel[$v] = $manualLevel[$v]; 00113 } else { 00114 $this->mManualLevel[$v] = 'bidirectional'; 00115 } 00116 $this->mFlags[$v] = $v; 00117 } 00118 } 00119 00126 public function getVariants() { 00127 return $this->mVariants; 00128 } 00129 00141 public function getVariantFallbacks( $variant ) { 00142 if ( isset( $this->mVariantFallbacks[$variant] ) ) { 00143 return $this->mVariantFallbacks[$variant]; 00144 } 00145 return $this->mMainLanguageCode; 00146 } 00147 00152 public function getConvRuleTitle() { 00153 return $this->mConvRuleTitle; 00154 } 00155 00160 public function getPreferredVariant() { 00161 global $wgDefaultLanguageVariant, $wgUser; 00162 00163 $req = $this->getURLVariant(); 00164 00165 if ( $wgUser->isLoggedIn() && !$req ) { 00166 $req = $this->getUserVariant(); 00167 } elseif ( !$req ) { 00168 $req = $this->getHeaderVariant(); 00169 } 00170 00171 if ( $wgDefaultLanguageVariant && !$req ) { 00172 $req = $this->validateVariant( $wgDefaultLanguageVariant ); 00173 } 00174 00175 // This function, unlike the other get*Variant functions, is 00176 // not memoized (i.e. there return value is not cached) since 00177 // new information might appear during processing after this 00178 // is first called. 00179 if ( $this->validateVariant( $req ) ) { 00180 return $req; 00181 } 00182 return $this->mMainLanguageCode; 00183 } 00184 00190 public function getDefaultVariant() { 00191 global $wgDefaultLanguageVariant; 00192 00193 $req = $this->getURLVariant(); 00194 00195 if ( !$req ) { 00196 $req = $this->getHeaderVariant(); 00197 } 00198 00199 if ( $wgDefaultLanguageVariant && !$req ) { 00200 $req = $this->validateVariant( $wgDefaultLanguageVariant ); 00201 } 00202 00203 if ( $req ) { 00204 return $req; 00205 } 00206 return $this->mMainLanguageCode; 00207 } 00208 00214 public function validateVariant( $variant = null ) { 00215 if ( $variant !== null && in_array( $variant, $this->mVariants ) ) { 00216 return $variant; 00217 } 00218 return null; 00219 } 00220 00226 public function getURLVariant() { 00227 global $wgRequest; 00228 00229 if ( $this->mURLVariant ) { 00230 return $this->mURLVariant; 00231 } 00232 00233 // see if the preference is set in the request 00234 $ret = $wgRequest->getText( 'variant' ); 00235 00236 if ( !$ret ) { 00237 $ret = $wgRequest->getVal( 'uselang' ); 00238 } 00239 00240 return $this->mURLVariant = $this->validateVariant( $ret ); 00241 } 00242 00248 protected function getUserVariant() { 00249 global $wgUser; 00250 00251 // memoizing this function wreaks havoc on parserTest.php 00252 /* 00253 if ( $this->mUserVariant ) { 00254 return $this->mUserVariant; 00255 } 00256 */ 00257 00258 // Get language variant preference from logged in users 00259 // Don't call this on stub objects because that causes infinite 00260 // recursion during initialisation 00261 if ( $wgUser->isLoggedIn() ) { 00262 $ret = $wgUser->getOption( 'variant' ); 00263 } else { 00264 // figure out user lang without constructing wgLang to avoid 00265 // infinite recursion 00266 $ret = $wgUser->getOption( 'language' ); 00267 } 00268 00269 return $this->mUserVariant = $this->validateVariant( $ret ); 00270 } 00271 00277 protected function getHeaderVariant() { 00278 global $wgRequest; 00279 00280 if ( $this->mHeaderVariant ) { 00281 return $this->mHeaderVariant; 00282 } 00283 00284 // see if some supported language variant is set in the 00285 // HTTP header. 00286 $languages = array_keys( $wgRequest->getAcceptLang() ); 00287 if ( empty( $languages ) ) { 00288 return null; 00289 } 00290 00291 $fallbackLanguages = array(); 00292 foreach ( $languages as $language ) { 00293 $this->mHeaderVariant = $this->validateVariant( $language ); 00294 if ( $this->mHeaderVariant ) { 00295 break; 00296 } 00297 00298 // To see if there are fallbacks of current language. 00299 // We record these fallback variants, and process 00300 // them later. 00301 $fallbacks = $this->getVariantFallbacks( $language ); 00302 if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) { 00303 $fallbackLanguages[] = $fallbacks; 00304 } elseif ( is_array( $fallbacks ) ) { 00305 $fallbackLanguages = 00306 array_merge( $fallbackLanguages, $fallbacks ); 00307 } 00308 } 00309 00310 if ( !$this->mHeaderVariant ) { 00311 // process fallback languages now 00312 $fallback_languages = array_unique( $fallbackLanguages ); 00313 foreach ( $fallback_languages as $language ) { 00314 $this->mHeaderVariant = $this->validateVariant( $language ); 00315 if ( $this->mHeaderVariant ) { 00316 break; 00317 } 00318 } 00319 } 00320 00321 return $this->mHeaderVariant; 00322 } 00323 00334 public function autoConvert( $text, $toVariant = false ) { 00335 wfProfileIn( __METHOD__ ); 00336 00337 $this->loadTables(); 00338 00339 if ( !$toVariant ) { 00340 $toVariant = $this->getPreferredVariant(); 00341 if ( !$toVariant ) { 00342 wfProfileOut( __METHOD__ ); 00343 return $text; 00344 } 00345 } 00346 00347 if( $this->guessVariant( $text, $toVariant ) ) { 00348 wfProfileOut( __METHOD__ ); 00349 return $text; 00350 } 00351 00352 /* we convert everything except: 00353 1. HTML markups (anything between < and >) 00354 2. HTML entities 00355 3. placeholders created by the parser 00356 */ 00357 global $wgParser; 00358 if ( isset( $wgParser ) && $wgParser->UniqPrefix() != '' ) { 00359 $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+'; 00360 } else { 00361 $marker = ''; 00362 } 00363 00364 // this one is needed when the text is inside an HTML markup 00365 $htmlfix = '|<[^>]+$|^[^<>]*>'; 00366 00367 // disable convert to variants between <code></code> tags 00368 $codefix = '<code>.+?<\/code>|'; 00369 // disable convertsion of <script type="text/javascript"> ... </script> 00370 $scriptfix = '<script.*?>.*?<\/script>|'; 00371 // disable conversion of <pre xxxx> ... </pre> 00372 $prefix = '<pre.*?>.*?<\/pre>|'; 00373 00374 $reg = '/' . $codefix . $scriptfix . $prefix . 00375 '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s'; 00376 $startPos = 0; 00377 $sourceBlob = ''; 00378 $literalBlob = ''; 00379 00380 // Guard against delimiter nulls in the input 00381 $text = str_replace( "\000", '', $text ); 00382 00383 $markupMatches = null; 00384 $elementMatches = null; 00385 while ( $startPos < strlen( $text ) ) { 00386 if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) { 00387 $elementPos = $markupMatches[0][1]; 00388 $element = $markupMatches[0][0]; 00389 } else { 00390 $elementPos = strlen( $text ); 00391 $element = ''; 00392 } 00393 00394 // Queue the part before the markup for translation in a batch 00395 $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000"; 00396 00397 // Advance to the next position 00398 $startPos = $elementPos + strlen( $element ); 00399 00400 // Translate any alt or title attributes inside the matched element 00401 if ( $element !== '' && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element, 00402 $elementMatches ) ) 00403 { 00404 $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] ); 00405 $changed = false; 00406 foreach ( array( 'title', 'alt' ) as $attrName ) { 00407 if ( !isset( $attrs[$attrName] ) ) { 00408 continue; 00409 } 00410 $attr = $attrs[$attrName]; 00411 // Don't convert URLs 00412 if ( !strpos( $attr, '://' ) ) { 00413 $attr = $this->convertTo( $attr, $toVariant ); 00414 } 00415 00416 // Remove HTML tags to avoid disrupting the layout 00417 $attr = preg_replace( '/<[^>]+>/', '', $attr ); 00418 if ( $attr !== $attrs[$attrName] ) { 00419 $attrs[$attrName] = $attr; 00420 $changed = true; 00421 } 00422 } 00423 if ( $changed ) { 00424 $element = $elementMatches[1] . Html::expandAttributes( $attrs ) . 00425 $elementMatches[3]; 00426 } 00427 } 00428 $literalBlob .= $element . "\000"; 00429 } 00430 00431 // Do the main translation batch 00432 $translatedBlob = $this->translate( $sourceBlob, $toVariant ); 00433 00434 // Put the output back together 00435 $translatedIter = StringUtils::explode( "\000", $translatedBlob ); 00436 $literalIter = StringUtils::explode( "\000", $literalBlob ); 00437 $output = ''; 00438 while ( $translatedIter->valid() && $literalIter->valid() ) { 00439 $output .= $translatedIter->current(); 00440 $output .= $literalIter->current(); 00441 $translatedIter->next(); 00442 $literalIter->next(); 00443 } 00444 00445 wfProfileOut( __METHOD__ ); 00446 return $output; 00447 } 00448 00458 public function translate( $text, $variant ) { 00459 wfProfileIn( __METHOD__ ); 00460 // If $text is empty or only includes spaces, do nothing 00461 // Otherwise translate it 00462 if ( trim( $text ) ) { 00463 $this->loadTables(); 00464 $text = $this->mTables[$variant]->replace( $text ); 00465 } 00466 wfProfileOut( __METHOD__ ); 00467 return $text; 00468 } 00469 00476 public function autoConvertToAllVariants( $text ) { 00477 wfProfileIn( __METHOD__ ); 00478 $this->loadTables(); 00479 00480 $ret = array(); 00481 foreach ( $this->mVariants as $variant ) { 00482 $ret[$variant] = $this->translate( $text, $variant ); 00483 } 00484 00485 wfProfileOut( __METHOD__ ); 00486 return $ret; 00487 } 00488 00500 public function convertLinkToAllVariants( $text ) { 00501 return $this->autoConvertToAllVariants( $text ); 00502 } 00503 00509 protected function applyManualConv( $convRule ) { 00510 // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom 00511 // title conversion. 00512 // Bug 24072: $mConvRuleTitle was overwritten by other manual 00513 // rule(s) not for title, this breaks the title conversion. 00514 $newConvRuleTitle = $convRule->getTitle(); 00515 if ( $newConvRuleTitle ) { 00516 // So I add an empty check for getTitle() 00517 $this->mConvRuleTitle = $newConvRuleTitle; 00518 } 00519 00520 // merge/remove manual conversion rules to/from global table 00521 $convTable = $convRule->getConvTable(); 00522 $action = $convRule->getRulesAction(); 00523 foreach ( $convTable as $variant => $pair ) { 00524 if ( !$this->validateVariant( $variant ) ) { 00525 continue; 00526 } 00527 00528 if ( $action == 'add' ) { 00529 foreach ( $pair as $from => $to ) { 00530 // to ensure that $from and $to not be left blank 00531 // so $this->translate() could always return a string 00532 if ( $from || $to ) { 00533 // more efficient than array_merge(), about 2.5 times. 00534 $this->mTables[$variant]->setPair( $from, $to ); 00535 } 00536 } 00537 } elseif ( $action == 'remove' ) { 00538 $this->mTables[$variant]->removeArray( $pair ); 00539 } 00540 } 00541 } 00542 00550 public function convertTitle( $title ) { 00551 $variant = $this->getPreferredVariant(); 00552 $index = $title->getNamespace(); 00553 if ( $index !== NS_MAIN ) { 00554 $text = $this->convertNamespace( $index ) . ':'; 00555 } else { 00556 $text = ''; 00557 } 00558 $text .= $this->translate( $title->getText(), $variant ); 00559 return $text; 00560 } 00561 00568 public function convertNamespace( $index ) { 00569 $variant = $this->getPreferredVariant(); 00570 if ( $index === NS_MAIN ) { 00571 return ''; 00572 } else { 00573 // First check if a message gives a converted name in the target variant. 00574 $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant ); 00575 if ( $nsConvMsg->exists() ) { 00576 return $nsConvMsg->plain(); 00577 } 00578 // Then check if a message gives a converted name in content language 00579 // which needs extra translation to the target variant. 00580 $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage(); 00581 if ( $nsConvMsg->exists() ) { 00582 return $this->translate( $nsConvMsg->plain(), $variant ); 00583 } 00584 // No message exists, retrieve it from the target variant's namespace names. 00585 $langObj = $this->mLangObj->factory( $variant ); 00586 return $langObj->getFormattedNsText( $index ); 00587 } 00588 } 00589 00604 public function convert( $text ) { 00605 $variant = $this->getPreferredVariant(); 00606 return $this->convertTo( $text, $variant ); 00607 } 00608 00616 public function convertTo( $text, $variant ) { 00617 global $wgDisableLangConversion; 00618 if ( $wgDisableLangConversion ) { 00619 return $text; 00620 } 00621 return $this->recursiveConvertTopLevel( $text, $variant ); 00622 } 00623 00633 protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) { 00634 $startPos = 0; 00635 $out = ''; 00636 $length = strlen( $text ); 00637 $shouldConvert = !$this->guessVariant( $text, $variant ); 00638 00639 while ( $startPos < $length ) { 00640 $pos = strpos( $text, '-{', $startPos ); 00641 00642 if ( $pos === false ) { 00643 // No more markup, append final segment 00644 $fragment = substr( $text, $startPos ); 00645 $out .= $shouldConvert? $this->autoConvert( $fragment, $variant ): $fragment; 00646 return $out; 00647 } 00648 00649 // Markup found 00650 // Append initial segment 00651 $fragment = substr( $text, $startPos, $pos - $startPos ); 00652 $out .= $shouldConvert? $this->autoConvert( $fragment, $variant ): $fragment; 00653 00654 // Advance position 00655 $startPos = $pos; 00656 00657 // Do recursive conversion 00658 $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); 00659 } 00660 00661 return $out; 00662 } 00663 00675 protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) { 00676 // Quick sanity check (no function calls) 00677 if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) { 00678 throw new MWException( __METHOD__ . ': invalid input string' ); 00679 } 00680 00681 $startPos += 2; 00682 $inner = ''; 00683 $warningDone = false; 00684 $length = strlen( $text ); 00685 00686 while ( $startPos < $length ) { 00687 $m = false; 00688 preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos ); 00689 if ( !$m ) { 00690 // Unclosed rule 00691 break; 00692 } 00693 00694 $token = $m[0][0]; 00695 $pos = $m[0][1]; 00696 00697 // Markup found 00698 // Append initial segment 00699 $inner .= substr( $text, $startPos, $pos - $startPos ); 00700 00701 // Advance position 00702 $startPos = $pos; 00703 00704 switch ( $token ) { 00705 case '-{': 00706 // Check max depth 00707 if ( $depth >= $this->mMaxDepth ) { 00708 $inner .= '-{'; 00709 if ( !$warningDone ) { 00710 $inner .= '<span class="error">' . 00711 wfMessage( 'language-converter-depth-warning' ) 00712 ->numParams( $this->mMaxDepth )->inContentLanguage()->text() . 00713 '</span>'; 00714 $warningDone = true; 00715 } 00716 $startPos += 2; 00717 continue; 00718 } 00719 // Recursively parse another rule 00720 $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); 00721 break; 00722 case '}-': 00723 // Apply the rule 00724 $startPos += 2; 00725 $rule = new ConverterRule( $inner, $this ); 00726 $rule->parse( $variant ); 00727 $this->applyManualConv( $rule ); 00728 return $rule->getDisplay(); 00729 default: 00730 throw new MWException( __METHOD__ . ': invalid regex match' ); 00731 } 00732 } 00733 00734 // Unclosed rule 00735 if ( $startPos < $length ) { 00736 $inner .= substr( $text, $startPos ); 00737 } 00738 $startPos = $length; 00739 return '-{' . $this->autoConvert( $inner, $variant ); 00740 } 00741 00753 public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { 00754 # If the article has already existed, there is no need to 00755 # check it again, otherwise it may cause a fault. 00756 if ( is_object( $nt ) && $nt->exists() ) { 00757 return; 00758 } 00759 00760 global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest, 00761 $wgUser; 00762 $isredir = $wgRequest->getText( 'redirect', 'yes' ); 00763 $action = $wgRequest->getText( 'action' ); 00764 $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' ); 00765 $disableLinkConversion = $wgDisableLangConversion 00766 || $wgDisableTitleConversion; 00767 $linkBatch = new LinkBatch(); 00768 00769 $ns = NS_MAIN; 00770 00771 if ( $disableLinkConversion || 00772 ( !$ignoreOtherCond && 00773 ( $isredir == 'no' 00774 || $action == 'edit' 00775 || $action == 'submit' 00776 || $linkconvert == 'no' 00777 || $wgUser->getOption( 'noconvertlink' ) == 1 ) ) ) { 00778 return; 00779 } 00780 00781 if ( is_object( $nt ) ) { 00782 $ns = $nt->getNamespace(); 00783 } 00784 00785 $variants = $this->autoConvertToAllVariants( $link ); 00786 if ( !$variants ) { // give up 00787 return; 00788 } 00789 00790 $titles = array(); 00791 00792 foreach ( $variants as $v ) { 00793 if ( $v != $link ) { 00794 $varnt = Title::newFromText( $v, $ns ); 00795 if ( !is_null( $varnt ) ) { 00796 $linkBatch->addObj( $varnt ); 00797 $titles[] = $varnt; 00798 } 00799 } 00800 } 00801 00802 // fetch all variants in single query 00803 $linkBatch->execute(); 00804 00805 foreach ( $titles as $varnt ) { 00806 if ( $varnt->getArticleID() > 0 ) { 00807 $nt = $varnt; 00808 $link = $varnt->getText(); 00809 break; 00810 } 00811 } 00812 } 00813 00819 public function getExtraHashOptions() { 00820 $variant = $this->getPreferredVariant(); 00821 return '!' . $variant; 00822 } 00823 00834 public function guessVariant($text, $variant) { 00835 return false; 00836 } 00837 00845 function loadDefaultTables() { 00846 $name = get_class( $this ); 00847 throw new MWException( "Must implement loadDefaultTables() method in class $name" ); 00848 } 00849 00855 function loadTables( $fromCache = true ) { 00856 global $wgLangConvMemc; 00857 00858 if ( $this->mTablesLoaded ) { 00859 return; 00860 } 00861 00862 wfProfileIn( __METHOD__ ); 00863 $this->mTablesLoaded = true; 00864 $this->mTables = false; 00865 if ( $fromCache ) { 00866 wfProfileIn( __METHOD__ . '-cache' ); 00867 $this->mTables = $wgLangConvMemc->get( $this->mCacheKey ); 00868 wfProfileOut( __METHOD__ . '-cache' ); 00869 } 00870 if ( !$this->mTables 00871 || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) { 00872 wfProfileIn( __METHOD__ . '-recache' ); 00873 // not in cache, or we need a fresh reload. 00874 // We will first load the default tables 00875 // then update them using things in MediaWiki:Conversiontable/* 00876 $this->loadDefaultTables(); 00877 foreach ( $this->mVariants as $var ) { 00878 $cached = $this->parseCachedTable( $var ); 00879 $this->mTables[$var]->mergeArray( $cached ); 00880 } 00881 00882 $this->postLoadTables(); 00883 $this->mTables[self::CACHE_VERSION_KEY] = true; 00884 00885 $wgLangConvMemc->set( $this->mCacheKey, $this->mTables, 43200 ); 00886 wfProfileOut( __METHOD__ . '-recache' ); 00887 } 00888 wfProfileOut( __METHOD__ ); 00889 } 00890 00894 function postLoadTables() { } 00895 00901 function reloadTables() { 00902 if ( $this->mTables ) { 00903 unset( $this->mTables ); 00904 } 00905 $this->mTablesLoaded = false; 00906 $this->loadTables( false ); 00907 } 00908 00928 function parseCachedTable( $code, $subpage = '', $recursive = true ) { 00929 static $parsed = array(); 00930 00931 $key = 'Conversiontable/' . $code; 00932 if ( $subpage ) { 00933 $key .= '/' . $subpage; 00934 } 00935 if ( array_key_exists( $key, $parsed ) ) { 00936 return array(); 00937 } 00938 00939 $parsed[$key] = true; 00940 00941 if ( $subpage === '' ) { 00942 $txt = MessageCache::singleton()->get( 'conversiontable', true, $code ); 00943 } else { 00944 $txt = false; 00945 $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key ); 00946 if ( $title && $title->exists() ) { 00947 $revision = Revision::newFromTitle( $title ); 00948 if ( $revision ) { 00949 if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) { 00950 $txt = $revision->getContent( Revision::RAW )->getNativeData(); 00951 } 00952 00953 //@todo: in the future, use a specialized content model, perhaps based on json! 00954 } 00955 } 00956 } 00957 00958 # Nothing to parse if there's no text 00959 if ( $txt === false || $txt === null || $txt === '' ) { 00960 return array(); 00961 } 00962 00963 // get all subpage links of the form 00964 // [[MediaWiki:Conversiontable/zh-xx/...|...]] 00965 $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) . 00966 ':Conversiontable'; 00967 $subs = StringUtils::explode( '[[', $txt ); 00968 $sublinks = array(); 00969 foreach ( $subs as $sub ) { 00970 $link = explode( ']]', $sub, 2 ); 00971 if ( count( $link ) != 2 ) { 00972 continue; 00973 } 00974 $b = explode( '|', $link[0], 2 ); 00975 $b = explode( '/', trim( $b[0] ), 3 ); 00976 if ( count( $b ) == 3 ) { 00977 $sublink = $b[2]; 00978 } else { 00979 $sublink = ''; 00980 } 00981 00982 if ( $b[0] == $linkhead && $b[1] == $code ) { 00983 $sublinks[] = $sublink; 00984 } 00985 } 00986 00987 // parse the mappings in this page 00988 $blocks = StringUtils::explode( '-{', $txt ); 00989 $ret = array(); 00990 $first = true; 00991 foreach ( $blocks as $block ) { 00992 if ( $first ) { 00993 // Skip the part before the first -{ 00994 $first = false; 00995 continue; 00996 } 00997 $mappings = explode( '}-', $block, 2 ); 00998 $stripped = str_replace( array( "'", '"', '*', '#' ), '', 00999 $mappings[0] ); 01000 $table = StringUtils::explode( ';', $stripped ); 01001 foreach ( $table as $t ) { 01002 $m = explode( '=>', $t, 3 ); 01003 if ( count( $m ) != 2 ) { 01004 continue; 01005 } 01006 // trim any trailling comments starting with '//' 01007 $tt = explode( '//', $m[1], 2 ); 01008 $ret[trim( $m[0] )] = trim( $tt[0] ); 01009 } 01010 } 01011 01012 // recursively parse the subpages 01013 if ( $recursive ) { 01014 foreach ( $sublinks as $link ) { 01015 $s = $this->parseCachedTable( $code, $link, $recursive ); 01016 $ret = array_merge( $ret, $s ); 01017 } 01018 } 01019 01020 if ( $this->mUcfirst ) { 01021 foreach ( $ret as $k => $v ) { 01022 $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v ); 01023 } 01024 } 01025 return $ret; 01026 } 01027 01036 public function markNoConversion( $text, $noParse = false ) { 01037 # don't mark if already marked 01038 if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) { 01039 return $text; 01040 } 01041 01042 $ret = "-{R|$text}-"; 01043 return $ret; 01044 } 01045 01054 function convertCategoryKey( $key ) { 01055 return $key; 01056 } 01057 01074 function OnPageContentSaveComplete( $page, $user, $content, $summary, $isMinor, 01075 $isWatch, $section, $flags, $revision ) { 01076 $titleobj = $page->getTitle(); 01077 if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) { 01078 $title = $titleobj->getDBkey(); 01079 $t = explode( '/', $title, 3 ); 01080 $c = count( $t ); 01081 if ( $c > 1 && $t[0] == 'Conversiontable' ) { 01082 if ( $this->validateVariant( $t[1] ) ) { 01083 $this->reloadTables(); 01084 } 01085 } 01086 } 01087 return true; 01088 } 01089 01098 public function armourMath( $text ) { 01099 // convert '-{' and '}-' to '-{' and '}-' to prevent 01100 // any unwanted markup appearing in the math image tag. 01101 $text = strtr( $text, array( '-{' => '-{', '}-' => '}-' ) ); 01102 return $text; 01103 } 01104 01108 function getVarSeparatorPattern() { 01109 if ( is_null( $this->mVarSeparatorPattern ) ) { 01110 // varsep_pattern for preg_split: 01111 // text should be splited by ";" only if a valid variant 01112 // name exist after the markup, for example: 01113 // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\ 01114 // <span style="font-size:120%;">yyy</span>;}- 01115 // we should split it as: 01116 // array( 01117 // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>' 01118 // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>' 01119 // [2] => '' 01120 // ) 01121 $pat = '/;\s*(?='; 01122 foreach ( $this->mVariants as $variant ) { 01123 // zh-hans:xxx;zh-hant:yyy 01124 $pat .= $variant . '\s*:|'; 01125 // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz 01126 $pat .= '[^;]*?=>\s*' . $variant . '\s*:|'; 01127 } 01128 $pat .= '\s*$)/'; 01129 $this->mVarSeparatorPattern = $pat; 01130 } 01131 return $this->mVarSeparatorPattern; 01132 } 01133 } 01134 01140 class ConverterRule { 01141 public $mText; // original text in -{text}- 01142 public $mConverter; // LanguageConverter object 01143 public $mRuleDisplay = ''; 01144 public $mRuleTitle = false; 01145 public $mRules = '';// string : the text of the rules 01146 public $mRulesAction = 'none'; 01147 public $mFlags = array(); 01148 public $mVariantFlags = array(); 01149 public $mConvTable = array(); 01150 public $mBidtable = array();// array of the translation in each variant 01151 public $mUnidtable = array();// array of the translation in each variant 01152 01159 public function __construct( $text, $converter ) { 01160 $this->mText = $text; 01161 $this->mConverter = $converter; 01162 } 01163 01170 public function getTextInBidtable( $variants ) { 01171 $variants = (array)$variants; 01172 if ( !$variants ) { 01173 return false; 01174 } 01175 foreach ( $variants as $variant ) { 01176 if ( isset( $this->mBidtable[$variant] ) ) { 01177 return $this->mBidtable[$variant]; 01178 } 01179 } 01180 return false; 01181 } 01182 01187 function parseFlags() { 01188 $text = $this->mText; 01189 $flags = array(); 01190 $variantFlags = array(); 01191 01192 $sepPos = strpos( $text, '|' ); 01193 if ( $sepPos !== false ) { 01194 $validFlags = $this->mConverter->mFlags; 01195 $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) ); 01196 foreach ( $f as $ff ) { 01197 $ff = trim( $ff ); 01198 if ( isset( $validFlags[$ff] ) ) { 01199 $flags[$validFlags[$ff]] = true; 01200 } 01201 } 01202 $text = strval( substr( $text, $sepPos + 1 ) ); 01203 } 01204 01205 if ( !$flags ) { 01206 $flags['S'] = true; 01207 } elseif ( isset( $flags['R'] ) ) { 01208 $flags = array( 'R' => true );// remove other flags 01209 } elseif ( isset( $flags['N'] ) ) { 01210 $flags = array( 'N' => true );// remove other flags 01211 } elseif ( isset( $flags['-'] ) ) { 01212 $flags = array( '-' => true );// remove other flags 01213 } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) { 01214 $flags['H'] = true; 01215 } elseif ( isset( $flags['H'] ) ) { 01216 // replace A flag, and remove other flags except T 01217 $temp = array( '+' => true, 'H' => true ); 01218 if ( isset( $flags['T'] ) ) { 01219 $temp['T'] = true; 01220 } 01221 if ( isset( $flags['D'] ) ) { 01222 $temp['D'] = true; 01223 } 01224 $flags = $temp; 01225 } else { 01226 if ( isset( $flags['A'] ) ) { 01227 $flags['+'] = true; 01228 $flags['S'] = true; 01229 } 01230 if ( isset( $flags['D'] ) ) { 01231 unset( $flags['S'] ); 01232 } 01233 // try to find flags like "zh-hans", "zh-hant" 01234 // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-" 01235 $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants ); 01236 if ( $variantFlags ) { 01237 $variantFlags = array_flip( $variantFlags ); 01238 $flags = array(); 01239 } 01240 } 01241 $this->mVariantFlags = $variantFlags; 01242 $this->mRules = $text; 01243 $this->mFlags = $flags; 01244 } 01245 01250 function parseRules() { 01251 $rules = $this->mRules; 01252 $bidtable = array(); 01253 $unidtable = array(); 01254 $variants = $this->mConverter->mVariants; 01255 $varsep_pattern = $this->mConverter->getVarSeparatorPattern(); 01256 01257 $choice = preg_split( $varsep_pattern, $rules ); 01258 01259 foreach ( $choice as $c ) { 01260 $v = explode( ':', $c, 2 ); 01261 if ( count( $v ) != 2 ) { 01262 // syntax error, skip 01263 continue; 01264 } 01265 $to = trim( $v[1] ); 01266 $v = trim( $v[0] ); 01267 $u = explode( '=>', $v, 2 ); 01268 // if $to is empty, strtr() could return a wrong result 01269 if ( count( $u ) == 1 && $to && in_array( $v, $variants ) ) { 01270 $bidtable[$v] = $to; 01271 } elseif ( count( $u ) == 2 ) { 01272 $from = trim( $u[0] ); 01273 $v = trim( $u[1] ); 01274 if ( array_key_exists( $v, $unidtable ) 01275 && !is_array( $unidtable[$v] ) 01276 && $to 01277 && in_array( $v, $variants ) ) { 01278 $unidtable[$v] = array( $from => $to ); 01279 } elseif ( $to && in_array( $v, $variants ) ) { 01280 $unidtable[$v][$from] = $to; 01281 } 01282 } 01283 // syntax error, pass 01284 if ( !isset( $this->mConverter->mVariantNames[$v] ) ) { 01285 $bidtable = array(); 01286 $unidtable = array(); 01287 break; 01288 } 01289 } 01290 $this->mBidtable = $bidtable; 01291 $this->mUnidtable = $unidtable; 01292 } 01293 01299 function getRulesDesc() { 01300 $codesep = $this->mConverter->mDescCodeSep; 01301 $varsep = $this->mConverter->mDescVarSep; 01302 $text = ''; 01303 foreach ( $this->mBidtable as $k => $v ) { 01304 $text .= $this->mConverter->mVariantNames[$k] . "$codesep$v$varsep"; 01305 } 01306 foreach ( $this->mUnidtable as $k => $a ) { 01307 foreach ( $a as $from => $to ) { 01308 $text .= $from . '⇒' . $this->mConverter->mVariantNames[$k] . 01309 "$codesep$to$varsep"; 01310 } 01311 } 01312 return $text; 01313 } 01314 01323 function getRuleConvertedStr( $variant ) { 01324 $bidtable = $this->mBidtable; 01325 $unidtable = $this->mUnidtable; 01326 01327 if ( count( $bidtable ) + count( $unidtable ) == 0 ) { 01328 return $this->mRules; 01329 } else { 01330 // display current variant in bidirectional array 01331 $disp = $this->getTextInBidtable( $variant ); 01332 // or display current variant in fallbacks 01333 if ( !$disp ) { 01334 $disp = $this->getTextInBidtable( 01335 $this->mConverter->getVariantFallbacks( $variant ) ); 01336 } 01337 // or display current variant in unidirectional array 01338 if ( !$disp && array_key_exists( $variant, $unidtable ) ) { 01339 $disp = array_values( $unidtable[$variant] ); 01340 $disp = $disp[0]; 01341 } 01342 // or display frist text under disable manual convert 01343 if ( !$disp 01344 && $this->mConverter->mManualLevel[$variant] == 'disable' ) { 01345 if ( count( $bidtable ) > 0 ) { 01346 $disp = array_values( $bidtable ); 01347 $disp = $disp[0]; 01348 } else { 01349 $disp = array_values( $unidtable ); 01350 $disp = array_values( $disp[0] ); 01351 $disp = $disp[0]; 01352 } 01353 } 01354 return $disp; 01355 } 01356 } 01357 01362 function generateConvTable() { 01363 // Special case optimisation 01364 if ( !$this->mBidtable && !$this->mUnidtable ) { 01365 $this->mConvTable = array(); 01366 return; 01367 } 01368 01369 $bidtable = $this->mBidtable; 01370 $unidtable = $this->mUnidtable; 01371 $manLevel = $this->mConverter->mManualLevel; 01372 01373 $vmarked = array(); 01374 foreach ( $this->mConverter->mVariants as $v ) { 01375 /* for bidirectional array 01376 fill in the missing variants, if any, 01377 with fallbacks */ 01378 if ( !isset( $bidtable[$v] ) ) { 01379 $variantFallbacks = 01380 $this->mConverter->getVariantFallbacks( $v ); 01381 $vf = $this->getTextInBidtable( $variantFallbacks ); 01382 if ( $vf ) { 01383 $bidtable[$v] = $vf; 01384 } 01385 } 01386 01387 if ( isset( $bidtable[$v] ) ) { 01388 foreach ( $vmarked as $vo ) { 01389 // use syntax: -{A|zh:WordZh;zh-tw:WordTw}- 01390 // or -{H|zh:WordZh;zh-tw:WordTw}- 01391 // or -{-|zh:WordZh;zh-tw:WordTw}- 01392 // to introduce a custom mapping between 01393 // words WordZh and WordTw in the whole text 01394 if ( $manLevel[$v] == 'bidirectional' ) { 01395 $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v]; 01396 } 01397 if ( $manLevel[$vo] == 'bidirectional' ) { 01398 $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo]; 01399 } 01400 } 01401 $vmarked[] = $v; 01402 } 01403 /* for unidirectional array fill to convert tables */ 01404 if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' ) 01405 && isset( $unidtable[$v] ) ) 01406 { 01407 if ( isset( $this->mConvTable[$v] ) ) { 01408 $this->mConvTable[$v] = array_merge( $this->mConvTable[$v], $unidtable[$v] ); 01409 } else { 01410 $this->mConvTable[$v] = $unidtable[$v]; 01411 } 01412 } 01413 } 01414 } 01415 01420 public function parse( $variant = null ) { 01421 if ( !$variant ) { 01422 $variant = $this->mConverter->getPreferredVariant(); 01423 } 01424 01425 $this->parseFlags(); 01426 $flags = $this->mFlags; 01427 01428 // convert to specified variant 01429 // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}- 01430 if ( $this->mVariantFlags ) { 01431 // check if current variant in flags 01432 if ( isset( $this->mVariantFlags[$variant] ) ) { 01433 // then convert <text to convert> to current language 01434 $this->mRules = $this->mConverter->autoConvert( $this->mRules, 01435 $variant ); 01436 } else { // if current variant no in flags, 01437 // then we check its fallback variants. 01438 $variantFallbacks = 01439 $this->mConverter->getVariantFallbacks( $variant ); 01440 if( is_array( $variantFallbacks ) ) { 01441 foreach ( $variantFallbacks as $variantFallback ) { 01442 // if current variant's fallback exist in flags 01443 if ( isset( $this->mVariantFlags[$variantFallback] ) ) { 01444 // then convert <text to convert> to fallback language 01445 $this->mRules = 01446 $this->mConverter->autoConvert( $this->mRules, 01447 $variantFallback ); 01448 break; 01449 } 01450 } 01451 } 01452 } 01453 $this->mFlags = $flags = array( 'R' => true ); 01454 } 01455 01456 if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) { 01457 // decode => HTML entities modified by Sanitizer::removeHTMLtags 01458 $this->mRules = str_replace( '=>', '=>', $this->mRules ); 01459 $this->parseRules(); 01460 } 01461 $rules = $this->mRules; 01462 01463 if ( !$this->mBidtable && !$this->mUnidtable ) { 01464 if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) { 01465 // fill all variants if text in -{A/H/-|text} without rules 01466 foreach ( $this->mConverter->mVariants as $v ) { 01467 $this->mBidtable[$v] = $rules; 01468 } 01469 } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) { 01470 $this->mFlags = $flags = array( 'R' => true ); 01471 } 01472 } 01473 01474 $this->mRuleDisplay = false; 01475 foreach ( $flags as $flag => $unused ) { 01476 switch ( $flag ) { 01477 case 'R': 01478 // if we don't do content convert, still strip the -{}- tags 01479 $this->mRuleDisplay = $rules; 01480 break; 01481 case 'N': 01482 // process N flag: output current variant name 01483 $ruleVar = trim( $rules ); 01484 if ( isset( $this->mConverter->mVariantNames[$ruleVar] ) ) { 01485 $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar]; 01486 } else { 01487 $this->mRuleDisplay = ''; 01488 } 01489 break; 01490 case 'D': 01491 // process D flag: output rules description 01492 $this->mRuleDisplay = $this->getRulesDesc(); 01493 break; 01494 case 'H': 01495 // process H,- flag or T only: output nothing 01496 $this->mRuleDisplay = ''; 01497 break; 01498 case '-': 01499 $this->mRulesAction = 'remove'; 01500 $this->mRuleDisplay = ''; 01501 break; 01502 case '+': 01503 $this->mRulesAction = 'add'; 01504 $this->mRuleDisplay = ''; 01505 break; 01506 case 'S': 01507 $this->mRuleDisplay = $this->getRuleConvertedStr( $variant ); 01508 break; 01509 case 'T': 01510 $this->mRuleTitle = $this->getRuleConvertedStr( $variant ); 01511 $this->mRuleDisplay = ''; 01512 break; 01513 default: 01514 // ignore unknown flags (but see error case below) 01515 } 01516 } 01517 if ( $this->mRuleDisplay === false ) { 01518 $this->mRuleDisplay = '<span class="error">' 01519 . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped() 01520 . '</span>'; 01521 } 01522 01523 $this->generateConvTable(); 01524 } 01525 01529 public function hasRules() { 01530 // TODO: 01531 } 01532 01537 public function getDisplay() { 01538 return $this->mRuleDisplay; 01539 } 01540 01545 public function getTitle() { 01546 return $this->mRuleTitle; 01547 } 01548 01553 public function getRulesAction() { 01554 return $this->mRulesAction; 01555 } 01556 01562 public function getConvTable() { 01563 return $this->mConvTable; 01564 } 01565 01570 public function getRules() { 01571 return $this->mRules; 01572 } 01573 01578 public function getFlags() { 01579 return $this->mFlags; 01580 } 01581 }