MediaWiki
master
|
00001 <?php 00015 class CLDRPluralRuleEvaluator { 00024 public static function evaluate( $number, array $rules ) { 00025 $rules = self::compile( $rules ); 00026 return self::evaluateCompiled( $number, $rules ); 00027 } 00028 00036 public static function compile( array $rules ) { 00037 // We can't use array_map() for this because it generates a warning if 00038 // there is an exception. 00039 foreach ( $rules as &$rule ) { 00040 $rule = CLDRPluralRuleConverter::convert( $rule ); 00041 } 00042 return $rules; 00043 } 00044 00049 public static function evaluateCompiled( $number, array $rules ) { 00050 // The compiled form is RPN, with tokens strictly delimited by 00051 // spaces, so this is a simple RPN evaluator. 00052 foreach ( $rules as $i => $rule ) { 00053 $stack = array(); 00054 $zero = ord( '0' ); 00055 $nine = ord( '9' ); 00056 foreach ( StringUtils::explode( ' ', $rule ) as $token ) { 00057 $ord = ord( $token ); 00058 if ( $token === 'n' ) { 00059 $stack[] = $number; 00060 } elseif ( $ord >= $zero && $ord <= $nine ) { 00061 $stack[] = intval( $token ); 00062 } else { 00063 $right = array_pop( $stack ); 00064 $left = array_pop( $stack ); 00065 $result = self::doOperation( $token, $left, $right ); 00066 $stack[] = $result; 00067 } 00068 } 00069 if ( $stack[0] ) { 00070 return $i; 00071 } 00072 } 00073 // None of the provided rules match. The number belongs to caregory 00074 // 'other' which comes last. 00075 return count( $rules ); 00076 } 00077 00087 private static function doOperation( $token, $left, $right ) { 00088 if ( in_array( $token, array( 'in', 'not-in', 'within', 'not-within' ) ) ) { 00089 if ( !($right instanceof CLDRPluralRuleEvaluator_Range ) ) { 00090 $right = new CLDRPluralRuleEvaluator_Range( $right ); 00091 } 00092 } 00093 switch ( $token ) { 00094 case 'or': 00095 return $left || $right; 00096 case 'and': 00097 return $left && $right; 00098 case 'is': 00099 return $left == $right; 00100 case 'is-not': 00101 return $left != $right; 00102 case 'in': 00103 return $right->isNumberIn( $left ); 00104 case 'not-in': 00105 return !$right->isNumberIn( $left ); 00106 case 'within': 00107 return $right->isNumberWithin( $left ); 00108 case 'not-within': 00109 return !$right->isNumberWithin( $left ); 00110 case 'mod': 00111 if ( is_int( $left ) ) { 00112 return (int) fmod( $left, $right ); 00113 } 00114 return fmod( $left, $right ); 00115 case ',': 00116 if ( $left instanceof CLDRPluralRuleEvaluator_Range ) { 00117 $range = $left; 00118 } else { 00119 $range = new CLDRPluralRuleEvaluator_Range( $left ); 00120 } 00121 $range->add( $right ); 00122 return $range; 00123 case '..': 00124 return new CLDRPluralRuleEvaluator_Range( $left, $right ); 00125 default: 00126 throw new CLDRPluralRuleError( "Invalid RPN token" ); 00127 } 00128 } 00129 } 00130 00134 class CLDRPluralRuleEvaluator_Range { 00135 public $parts = array(); 00136 00137 function __construct( $start, $end = false ) { 00138 if ( $end === false ) { 00139 $this->parts[] = $start; 00140 } else { 00141 $this->parts[] = array( $start, $end ); 00142 } 00143 } 00144 00150 function isNumberIn( $number, $integerConstraint = true ) { 00151 foreach ( $this->parts as $part ) { 00152 if ( is_array( $part ) ) { 00153 if ( ( !$integerConstraint || floor( $number ) === (float)$number ) 00154 && $number >= $part[0] && $number <= $part[1] ) 00155 { 00156 return true; 00157 } 00158 } else { 00159 if ( $number == $part ) { 00160 return true; 00161 } 00162 } 00163 } 00164 return false; 00165 } 00166 00171 function isNumberWithin( $number ) { 00172 return $this->isNumberIn( $number, false ); 00173 } 00174 00179 function add( $other ) { 00180 if ( $other instanceof self ) { 00181 $this->parts = array_merge( $this->parts, $other->parts ); 00182 } else { 00183 $this->parts[] = $other; 00184 } 00185 } 00186 00190 function __toString() { 00191 $s = 'Range('; 00192 foreach ( $this->parts as $i => $part ) { 00193 if ( $i ) { 00194 $s .= ', '; 00195 } 00196 if ( is_array( $part ) ) { 00197 $s .= $part[0] . '..' . $part[1]; 00198 } else { 00199 $s .= $part; 00200 } 00201 } 00202 $s .= ')'; 00203 return $s; 00204 } 00205 00206 } 00207 00211 class CLDRPluralRuleConverter { 00212 public $rule, $pos, $end; 00213 public $operators = array(); 00214 public $operands = array(); 00215 00221 static $precedence = array( 00222 'or' => 2, 00223 'and' => 3, 00224 'is' => 4, 00225 'is-not' => 4, 00226 'in' => 4, 00227 'not-in' => 4, 00228 'within' => 4, 00229 'not-within' => 4, 00230 'mod' => 5, 00231 ',' => 6, 00232 '..' => 7, 00233 ); 00234 00238 const WHITESPACE_CLASS = " \t\r\n"; 00239 00244 const NUMBER_CLASS = '0123456789'; 00245 00249 const WORD_REGEX = '/[a-zA-Z]+/A'; 00250 00254 public static function convert( $rule ) { 00255 $parser = new self( $rule ); 00256 return $parser->doConvert(); 00257 } 00258 00262 protected function __construct( $rule ) { 00263 $this->rule = $rule; 00264 $this->pos = 0; 00265 $this->end = strlen( $rule ); 00266 } 00267 00271 protected function doConvert() { 00272 $expectOperator = true; 00273 00274 // Iterate through all tokens, saving the operators and operands to a 00275 // stack per Dijkstra's shunting yard algorithm. 00276 while ( false !== ( $token = $this->nextToken() ) ) { 00277 // In this grammar, there are only binary operators, so every valid 00278 // rule string will alternate between operator and operand tokens. 00279 $expectOperator = !$expectOperator; 00280 00281 if ( $token instanceof CLDRPluralRuleConverter_Expression ) { 00282 // Operand 00283 if ( $expectOperator ) { 00284 $token->error( 'unexpected operand' ); 00285 } 00286 $this->operands[] = $token; 00287 continue; 00288 } else { 00289 // Operator 00290 if ( !$expectOperator ) { 00291 $token->error( 'unexpected operator' ); 00292 } 00293 // Resolve higher precedence levels 00294 $lastOp = end( $this->operators ); 00295 while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) { 00296 $this->doOperation( $lastOp, $this->operands ); 00297 array_pop( $this->operators ); 00298 $lastOp = end( $this->operators ); 00299 } 00300 $this->operators[] = $token; 00301 } 00302 } 00303 00304 // Finish off the stack 00305 while ( $op = array_pop( $this->operators ) ) { 00306 $this->doOperation( $op, $this->operands ); 00307 } 00308 00309 // Make sure the result is sane. The first case is possible for an empty 00310 // string input, the second should be unreachable. 00311 if ( !count( $this->operands ) ) { 00312 $this->error( 'condition expected' ); 00313 } elseif ( count( $this->operands ) > 1 ) { 00314 $this->error( 'missing operator or too many operands' ); 00315 } 00316 00317 $value = $this->operands[0]; 00318 if ( $value->type !== 'boolean' ) { 00319 $this->error( 'the result must have a boolean type' ); 00320 } 00321 00322 return $this->operands[0]->rpn; 00323 } 00324 00329 protected function nextToken() { 00330 if ( $this->pos >= $this->end ) { 00331 return false; 00332 } 00333 00334 // Whitespace 00335 $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos ); 00336 $this->pos += $length; 00337 00338 if ( $this->pos >= $this->end ) { 00339 return false; 00340 } 00341 00342 // Number 00343 $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos ); 00344 if ( $length !== 0 ) { 00345 $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos ); 00346 $this->pos += $length; 00347 return $token; 00348 } 00349 00350 // Comma 00351 if ( $this->rule[$this->pos] === ',' ) { 00352 $token = $this->newOperator( ',', $this->pos, 1 ); 00353 $this->pos ++; 00354 return $token; 00355 } 00356 00357 // Dot dot 00358 if ( substr( $this->rule, $this->pos, 2 ) === '..' ) { 00359 $token = $this->newOperator( '..', $this->pos, 2 ); 00360 $this->pos += 2; 00361 return $token; 00362 } 00363 00364 // Word 00365 if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) { 00366 $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' ); 00367 } 00368 $word1 = strtolower( $m[0] ); 00369 $word2 = ''; 00370 $nextTokenPos = $this->pos + strlen( $word1 ); 00371 if ( $word1 === 'not' || $word1 === 'is' ) { 00372 // Look ahead one word 00373 $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos ); 00374 if ( $nextTokenPos < $this->end 00375 && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos ) ) 00376 { 00377 $word2 = strtolower( $m[0] ); 00378 $nextTokenPos += strlen( $word2 ); 00379 } 00380 } 00381 00382 // Two-word operators like "is not" take precedence over single-word operators like "is" 00383 if ( $word2 !== '' ) { 00384 $bothWords = "{$word1}-{$word2}"; 00385 if ( isset( self::$precedence[$bothWords] ) ) { 00386 $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos ); 00387 $this->pos = $nextTokenPos; 00388 return $token; 00389 } 00390 } 00391 00392 // Single-word operators 00393 if ( isset( self::$precedence[$word1] ) ) { 00394 $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) ); 00395 $this->pos += strlen( $word1 ); 00396 return $token; 00397 } 00398 00399 // The special numerical keyword "n" 00400 if ( $word1 === 'n' ) { 00401 $token = $this->newNumber( 'n', $this->pos ); 00402 $this->pos ++; 00403 return $token; 00404 } 00405 00406 $this->error( 'unrecognised word' ); 00407 } 00408 00414 protected function doOperation( $op ) { 00415 if ( count( $this->operands ) < 2 ) { 00416 $op->error( 'missing operand' ); 00417 } 00418 $right = array_pop( $this->operands ); 00419 $left = array_pop( $this->operands ); 00420 $result = $op->operate( $left, $right ); 00421 $this->operands[] = $result; 00422 } 00423 00427 protected function newNumber( $text, $pos ) { 00428 return new CLDRPluralRuleConverter_Expression( $this, 'number', $text, $pos, strlen( $text ) ); 00429 } 00430 00434 protected function newOperator( $type, $pos, $length ) { 00435 return new CLDRPluralRuleConverter_Operator( $this, $type, $pos, $length ); 00436 } 00437 00441 protected function error( $message ) { 00442 throw new CLDRPluralRuleError( $message ); 00443 } 00444 } 00445 00450 class CLDRPluralRuleConverter_Fragment { 00451 public $parser, $pos, $length, $end; 00452 00453 function __construct( $parser, $pos, $length ) { 00454 $this->parser = $parser; 00455 $this->pos = $pos; 00456 $this->length = $length; 00457 $this->end = $pos + $length; 00458 } 00459 00460 public function error( $message ) { 00461 $text = $this->getText(); 00462 throw new CLDRPluralRuleError( "$message at position " . ( $this->pos + 1 ) . ": \"$text\"" ); 00463 } 00464 00465 public function getText() { 00466 return substr( $this->parser->rule, $this->pos, $this->length ); 00467 } 00468 } 00469 00476 class CLDRPluralRuleConverter_Expression extends CLDRPluralRuleConverter_Fragment { 00477 public $type, $rpn; 00478 00479 function __construct( $parser, $type, $rpn, $pos, $length ) { 00480 parent::__construct( $parser, $pos, $length ); 00481 $this->type = $type; 00482 $this->rpn = $rpn; 00483 } 00484 00485 public function isType( $type ) { 00486 if ( $type === 'range' && ( $this->type === 'range' || $this->type === 'number' ) ) { 00487 return true; 00488 } 00489 if ( $type === $this->type ) { 00490 return true; 00491 } 00492 return false; 00493 } 00494 } 00495 00501 class CLDRPluralRuleConverter_Operator extends CLDRPluralRuleConverter_Fragment { 00502 public $name; 00503 00513 static $opTypes = array( 00514 'or' => 'bbb', 00515 'and' => 'bbb', 00516 'is' => 'nnb', 00517 'is-not' => 'nnb', 00518 'in' => 'nrb', 00519 'not-in' => 'nrb', 00520 'within' => 'nrb', 00521 'not-within' => 'nrb', 00522 'mod' => 'nnn', 00523 ',' => 'rrr', 00524 '..' => 'nnr', 00525 ); 00526 00530 static $typeSpecMap = array( 00531 'b' => 'boolean', 00532 'n' => 'number', 00533 'r' => 'range', 00534 ); 00535 00536 function __construct( $parser, $name, $pos, $length ) { 00537 parent::__construct( $parser, $pos, $length ); 00538 $this->name = $name; 00539 } 00540 00541 public function operate( $left, $right ) { 00542 $typeSpec = self::$opTypes[$this->name]; 00543 00544 $leftType = self::$typeSpecMap[$typeSpec[0]]; 00545 $rightType = self::$typeSpecMap[$typeSpec[1]]; 00546 $resultType = self::$typeSpecMap[$typeSpec[2]]; 00547 00548 $start = min( $this->pos, $left->pos, $right->pos ); 00549 $end = max( $this->end, $left->end, $right->end ); 00550 $length = $end - $start; 00551 00552 $newExpr = new CLDRPluralRuleConverter_Expression( $this->parser, $resultType, 00553 "{$left->rpn} {$right->rpn} {$this->name}", 00554 $start, $length ); 00555 00556 if ( !$left->isType( $leftType ) ) { 00557 $newExpr->error( "invalid type for left operand: expected $leftType, got {$left->type}" ); 00558 } 00559 00560 if ( !$right->isType( $rightType ) ) { 00561 $newExpr->error( "invalid type for right operand: expected $rightType, got {$right->type}" ); 00562 } 00563 return $newExpr; 00564 } 00565 } 00566 00571 class CLDRPluralRuleError extends MWException { 00572 function __construct( $message ) { 00573 parent::__construct( 'CLDR plural rule error: ' . $message ); 00574 } 00575 }