00001 <?php 00006 class CheckLanguageCLI { 00007 protected $code = null; 00008 protected $level = 2; 00009 protected $doLinks = false; 00010 protected $linksPrefix = ''; 00011 protected $wikiCode = 'en'; 00012 protected $checkAll = false; 00013 protected $output = 'plain'; 00014 protected $checks = array(); 00015 protected $L = null; 00016 00017 protected $results = array(); 00018 00019 private $includeExif = false; 00020 00025 public function __construct( Array $options ) { 00026 if ( isset( $options['help'] ) ) { 00027 echo $this->help(); 00028 exit(1); 00029 } 00030 00031 if ( isset( $options['lang'] ) ) { 00032 $this->code = $options['lang']; 00033 } else { 00034 global $wgLanguageCode; 00035 $this->code = $wgLanguageCode; 00036 } 00037 00038 if ( isset( $options['level'] ) ) { 00039 $this->level = $options['level']; 00040 } 00041 00042 $this->doLinks = isset( $options['links'] ); 00043 $this->includeExif = !isset( $options['noexif'] ); 00044 $this->checkAll = isset( $options['all'] ); 00045 00046 if ( isset( $options['prefix'] ) ) { 00047 $this->linksPrefix = $options['prefix']; 00048 } 00049 00050 if ( isset( $options['wikilang'] ) ) { 00051 $this->wikiCode = $options['wikilang']; 00052 } 00053 00054 if ( isset( $options['whitelist'] ) ) { 00055 $this->checks = explode( ',', $options['whitelist'] ); 00056 } elseif ( isset( $options['blacklist'] ) ) { 00057 $this->checks = array_diff( 00058 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(), 00059 explode( ',', $options['blacklist'] ) 00060 ); 00061 } elseif ( isset( $options['easy'] ) ) { 00062 $this->checks = $this->easyChecks(); 00063 } else { 00064 $this->checks = $this->defaultChecks(); 00065 } 00066 00067 if ( isset( $options['output'] ) ) { 00068 $this->output = $options['output']; 00069 } 00070 00071 $this->L = new languages( $this->includeExif ); 00072 } 00073 00078 protected function defaultChecks() { 00079 return array( 00080 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural', 00081 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced', 'namespace', 00082 'projecttalk', 'magic', 'magic-old', 'magic-over', 'magic-case', 00083 'special', 'special-old', 00084 ); 00085 } 00086 00091 protected function nonMessageChecks() { 00092 return array( 00093 'namespace', 'projecttalk', 'magic', 'magic-old', 'magic-over', 00094 'magic-case', 'special', 'special-old', 00095 ); 00096 } 00097 00102 protected function easyChecks() { 00103 return array( 00104 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars', 'magic-old', 00105 'magic-over', 'magic-case', 'special-old', 00106 ); 00107 } 00108 00113 protected function getChecks() { 00114 return array( 00115 'untranslated' => 'getUntranslatedMessages', 00116 'duplicate' => 'getDuplicateMessages', 00117 'obsolete' => 'getObsoleteMessages', 00118 'variables' => 'getMessagesWithMismatchVariables', 00119 'plural' => 'getMessagesWithoutPlural', 00120 'empty' => 'getEmptyMessages', 00121 'whitespace' => 'getMessagesWithWhitespace', 00122 'xhtml' => 'getNonXHTMLMessages', 00123 'chars' => 'getMessagesWithWrongChars', 00124 'links' => 'getMessagesWithDubiousLinks', 00125 'unbalanced' => 'getMessagesWithUnbalanced', 00126 'namespace' => 'getUntranslatedNamespaces', 00127 'projecttalk' => 'getProblematicProjectTalks', 00128 'magic' => 'getUntranslatedMagicWords', 00129 'magic-old' => 'getObsoleteMagicWords', 00130 'magic-over' => 'getOverridingMagicWords', 00131 'magic-case' => 'getCaseMismatchMagicWords', 00132 'special' => 'getUntraslatedSpecialPages', 00133 'special-old' => 'getObsoleteSpecialPages', 00134 ); 00135 } 00136 00143 protected function getTotalCount() { 00144 return array( 00145 'namespace' => array( 'getNamespaceNames', 'en' ), 00146 'projecttalk' => null, 00147 'magic' => array( 'getMagicWords', 'en' ), 00148 'magic-old' => array( 'getMagicWords', null ), 00149 'magic-over' => array( 'getMagicWords', null ), 00150 'magic-case' => array( 'getMagicWords', null ), 00151 'special' => array( 'getSpecialPageAliases', 'en' ), 00152 'special-old' => array( 'getSpecialPageAliases', null ), 00153 ); 00154 } 00155 00160 protected function getDescriptions() { 00161 return array( 00162 'untranslated' => '$1 message(s) of $2 are not translated to $3, but exist in en:', 00163 'duplicate' => '$1 message(s) of $2 are translated the same in en and $3:', 00164 'obsolete' => '$1 message(s) of $2 do not exist in en or are in the ignore list, but exist in $3:', 00165 'variables' => '$1 message(s) of $2 in $3 don\'t match the variables used in en:', 00166 'plural' => '$1 message(s) of $2 in $3 don\'t use {{plural}} while en uses:', 00167 'empty' => '$1 message(s) of $2 in $3 are empty or -:', 00168 'whitespace' => '$1 message(s) of $2 in $3 have trailing whitespace:', 00169 'xhtml' => '$1 message(s) of $2 in $3 contain illegal XHTML:', 00170 'chars' => '$1 message(s) of $2 in $3 include hidden chars which should not be used in the messages:', 00171 'links' => '$1 message(s) of $2 in $3 have problematic link(s):', 00172 'unbalanced' => '$1 message(s) of $2 in $3 have unbalanced {[]}:', 00173 'namespace' => '$1 namespace name(s) of $2 are not translated to $3, but exist in en:', 00174 'projecttalk' => '$1 namespace name(s) and alias(es) in $3 are project talk namespaces without the parameter:', 00175 'magic' => '$1 magic word(s) of $2 are not translated to $3, but exist in en:', 00176 'magic-old' => '$1 magic word(s) of $2 do not exist in en, but exist in $3:', 00177 'magic-over' => '$1 magic word(s) of $2 in $3 do not contain the original en word(s):', 00178 'magic-case' => '$1 magic word(s) of $2 in $3 change the case-sensitivity of the original en word:', 00179 'special' => '$1 special page alias(es) of $2 are not translated to $3, but exist in en:', 00180 'special-old' => '$1 special page alias(es) of $2 do not exist in en, but exist in $3:', 00181 ); 00182 } 00183 00188 protected function help() { 00189 return <<<ENDS 00190 Run this script to check a specific language file, or all of them. 00191 Command line settings are in form --parameter[=value]. 00192 Parameters: 00193 * lang: Language code (default: the installation default language). 00194 * all: Check all customized languages. 00195 * help: Show this help. 00196 * level: Show the following display level (default: 2). 00197 * links: Link the message values (default off). 00198 * prefix: prefix to add to links. 00199 * wikilang: For the links, what is the content language of the wiki to display the output in (default en). 00200 * whitelist: Do only the following checks (form: code,code). 00201 * blacklist: Don't do the following checks (form: code,code). 00202 * easy: Do only the easy checks, which can be treated by non-speakers of the language. 00203 * noexif: Don't check for EXIF messages (a bit hard and boring to translate), if you know that they are currently not translated and want to focus on other problems (default off). 00204 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc): 00205 * untranslated: Messages which are required to translate, but are not translated. 00206 * duplicate: Messages which translation equal to fallback 00207 * obsolete: Messages which are untranslatable or do not exist, but are translated. 00208 * variables: Messages without variables which should be used, or with variables which shouldn't be used. 00209 * empty: Empty messages and messages that contain only -. 00210 * whitespace: Messages which have trailing whitespace. 00211 * xhtml: Messages which are not well-formed XHTML (checks only few common errors). 00212 * chars: Messages with hidden characters. 00213 * links: Messages which contains broken links to pages (does not find all). 00214 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}. 00215 * namespace: Namespace names that were not translated. 00216 * projecttalk: Namespace names and aliases where the project talk does not contain $1. 00217 * magic: Magic words that were not translated. 00218 * magic-old: Magic words which do not exist. 00219 * magic-over: Magic words that override the original English word. 00220 * magic-case: Magic words whose translation changes the case-sensitivity of the original English word. 00221 * special: Special page names that were not translated. 00222 * special-old: Special page names which do not exist. 00223 Display levels (default: 2): 00224 * 0: Skip the checks (useful for checking syntax). 00225 * 1: Show only the stub headers and number of wrong messages, without list of messages. 00226 * 2: Show only the headers and the message keys, without the message values. 00227 * 3: Show both the headers and the complete messages, with both keys and values. 00228 00229 ENDS; 00230 } 00231 00235 public function execute() { 00236 $this->doChecks(); 00237 if ( $this->level > 0 ) { 00238 switch ( $this->output ) { 00239 case 'plain': 00240 $this->outputText(); 00241 break; 00242 case 'wiki': 00243 $this->outputWiki(); 00244 break; 00245 default: 00246 throw new MWException( "Invalid output type $this->output" ); 00247 } 00248 } 00249 } 00250 00254 protected function doChecks() { 00255 $ignoredCodes = array( 'en', 'enRTL' ); 00256 00257 $this->results = array(); 00258 # Check the language 00259 if ( $this->checkAll ) { 00260 foreach ( $this->L->getLanguages() as $language ) { 00261 if ( !in_array( $language, $ignoredCodes ) ) { 00262 $this->results[$language] = $this->checkLanguage( $language ); 00263 } 00264 } 00265 } else { 00266 if ( in_array( $this->code, $ignoredCodes ) ) { 00267 throw new MWException( "Cannot check code $this->code." ); 00268 } else { 00269 $this->results[$this->code] = $this->checkLanguage( $this->code ); 00270 } 00271 } 00272 } 00273 00278 protected function getCheckBlacklist() { 00279 global $checkBlacklist; 00280 return $checkBlacklist; 00281 } 00282 00288 protected function checkLanguage( $code ) { 00289 # Syntax check only 00290 if ( $this->level === 0 ) { 00291 $this->L->getMessages( $code ); 00292 return; 00293 } 00294 00295 $results = array(); 00296 $checkFunctions = $this->getChecks(); 00297 $checkBlacklist = $this->getCheckBlacklist(); 00298 foreach ( $this->checks as $check ) { 00299 if ( isset( $checkBlacklist[$code] ) && 00300 in_array( $check, $checkBlacklist[$code] ) ) { 00301 $results[$check] = array(); 00302 continue; 00303 } 00304 00305 $callback = array( $this->L, $checkFunctions[$check] ); 00306 if ( !is_callable( $callback ) ) { 00307 throw new MWException( "Unkown check $check." ); 00308 } 00309 $results[$check] = call_user_func( $callback, $code ); 00310 } 00311 00312 return $results; 00313 } 00314 00321 protected function formatKey( $key, $code ) { 00322 if ( $this->doLinks ) { 00323 $displayKey = ucfirst( $key ); 00324 if ( $code == $this->wikiCode ) { 00325 return "[[{$this->linksPrefix}MediaWiki:$displayKey|$key]]"; 00326 } else { 00327 return "[[{$this->linksPrefix}MediaWiki:$displayKey/$code|$key]]"; 00328 } 00329 } else { 00330 return $key; 00331 } 00332 } 00333 00338 protected function outputText() { 00339 foreach ( $this->results as $code => $results ) { 00340 $translated = $this->L->getMessages( $code ); 00341 $translated = count( $translated['translated'] ); 00342 foreach ( $results as $check => $messages ) { 00343 $count = count( $messages ); 00344 if ( $count ) { 00345 if ( $check == 'untranslated' ) { 00346 $translatable = $this->L->getGeneralMessages(); 00347 $total = count( $translatable['translatable'] ); 00348 } elseif ( in_array( $check, $this->nonMessageChecks() ) ) { 00349 $totalCount = $this->getTotalCount(); 00350 $totalCount = $totalCount[$check]; 00351 $callback = array( $this->L, $totalCount[0] ); 00352 $callCode = $totalCount[1] ? $totalCount[1] : $code; 00353 $total = count( call_user_func( $callback, $callCode ) ); 00354 } else { 00355 $total = $translated; 00356 } 00357 $search = array( '$1', '$2', '$3' ); 00358 $replace = array( $count, $total, $code ); 00359 $descriptions = $this->getDescriptions(); 00360 echo "\n" . str_replace( $search, $replace, $descriptions[$check] ) . "\n"; 00361 if ( $this->level == 1 ) { 00362 echo "[messages are hidden]\n"; 00363 } else { 00364 foreach ( $messages as $key => $value ) { 00365 if( !in_array( $check, $this->nonMessageChecks() ) ) { 00366 $key = $this->formatKey( $key, $code ); 00367 } 00368 if ( $this->level == 2 || empty( $value ) ) { 00369 echo "* $key\n"; 00370 } else { 00371 echo "* $key: '$value'\n"; 00372 } 00373 } 00374 } 00375 } 00376 } 00377 } 00378 } 00379 00384 function outputWiki() { 00385 global $wgContLang, $IP; 00386 $detailText = ''; 00387 $rows[] = '! Language !! Code !! Total !! ' . implode( ' !! ', array_diff( $this->checks, $this->nonMessageChecks() ) ); 00388 foreach ( $this->results as $code => $results ) { 00389 $detailTextForLang = "==$code==\n"; 00390 $numbers = array(); 00391 $problems = 0; 00392 $detailTextForLangChecks = array(); 00393 foreach ( $results as $check => $messages ) { 00394 if( in_array( $check, $this->nonMessageChecks() ) ) { 00395 continue; 00396 } 00397 $count = count( $messages ); 00398 if ( $count ) { 00399 $problems += $count; 00400 $messageDetails = array(); 00401 foreach ( $messages as $key => $details ) { 00402 $displayKey = $this->formatKey( $key, $code ); 00403 $messageDetails[] = $displayKey; 00404 } 00405 $detailTextForLangChecks[] = "=== $code-$check ===\n* " . implode( ', ', $messageDetails ); 00406 $numbers[] = "'''[[#$code-$check|$count]]'''"; 00407 } else { 00408 $numbers[] = $count; 00409 } 00410 00411 } 00412 00413 if ( count( $detailTextForLangChecks ) ) { 00414 $detailText .= $detailTextForLang . implode( "\n", $detailTextForLangChecks ) . "\n"; 00415 } 00416 00417 if ( !$problems ) { 00418 # Don't list languages without problems 00419 continue; 00420 } 00421 $language = $wgContLang->getLanguageName( $code ); 00422 $rows[] = "| $language || $code || $problems || " . implode( ' || ', $numbers ); 00423 } 00424 00425 $tableRows = implode( "\n|-\n", $rows ); 00426 00427 $version = SpecialVersion::getVersion( 'nodb' ); 00428 echo <<<EOL 00429 '''Check results are for:''' <code>$version</code> 00430 00431 00432 {| class="sortable wikitable" border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: 1px #AAAAAA solid; border-collapse: collapse; clear: both;" 00433 $tableRows 00434 |} 00435 00436 $detailText 00437 00438 EOL; 00439 } 00440 00445 protected function isEmpty() { 00446 foreach( $this->results as $code => $results ) { 00447 foreach( $results as $check => $messages ) { 00448 if( !empty( $messages ) ) { 00449 return false; 00450 } 00451 } 00452 } 00453 return true; 00454 } 00455 } 00456 00457 class CheckExtensionsCLI extends CheckLanguageCLI { 00458 private $extensions; 00459 00465 public function __construct( Array $options, $extension ) { 00466 if ( isset( $options['help'] ) ) { 00467 echo $this->help(); 00468 exit(1); 00469 } 00470 00471 if ( isset( $options['lang'] ) ) { 00472 $this->code = $options['lang']; 00473 } else { 00474 global $wgLanguageCode; 00475 $this->code = $wgLanguageCode; 00476 } 00477 00478 if ( isset( $options['level'] ) ) { 00479 $this->level = $options['level']; 00480 } 00481 00482 $this->doLinks = isset( $options['links'] ); 00483 00484 if ( isset( $options['wikilang'] ) ) { 00485 $this->wikiCode = $options['wikilang']; 00486 } 00487 00488 if ( isset( $options['whitelist'] ) ) { 00489 $this->checks = explode( ',', $options['whitelist'] ); 00490 } elseif ( isset( $options['blacklist'] ) ) { 00491 $this->checks = array_diff( 00492 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(), 00493 explode( ',', $options['blacklist'] ) 00494 ); 00495 } elseif ( isset( $options['easy'] ) ) { 00496 $this->checks = $this->easyChecks(); 00497 } else { 00498 $this->checks = $this->defaultChecks(); 00499 } 00500 00501 if ( isset( $options['output'] ) ) { 00502 $this->output = $options['output']; 00503 } 00504 00505 # Some additional checks not enabled by default 00506 if ( isset( $options['duplicate'] ) ) { 00507 $this->checks[] = 'duplicate'; 00508 } 00509 00510 $this->extensions = array(); 00511 $extensions = new PremadeMediawikiExtensionGroups(); 00512 $extensions->addAll(); 00513 if ( $extension == 'all' ) { 00514 foreach ( MessageGroups::singleton()->getGroups() as $group ) { 00515 if ( strpos( $group->getId(), 'ext-' ) === 0 && !$group->isMeta() ) { 00516 $this->extensions[] = new extensionLanguages( $group ); 00517 } 00518 } 00519 } elseif ( $extension == 'wikimedia' ) { 00520 $wikimedia = MessageGroups::getGroup( 'ext-0-wikimedia' ); 00521 foreach ( $wikimedia->wmfextensions() as $extension ) { 00522 $group = MessageGroups::getGroup( $extension ); 00523 $this->extensions[] = new extensionLanguages( $group ); 00524 } 00525 } elseif ( $extension == 'flaggedrevs' ) { 00526 foreach ( MessageGroups::singleton()->getGroups() as $group ) { 00527 if ( strpos( $group->getId(), 'ext-flaggedrevs-' ) === 0 && !$group->isMeta() ) { 00528 $this->extensions[] = new extensionLanguages( $group ); 00529 } 00530 } 00531 } else { 00532 $extensions = explode( ',', $extension ); 00533 foreach ( $extensions as $extension ) { 00534 $group = MessageGroups::getGroup( 'ext-' . $extension ); 00535 if ( $group ) { 00536 $extension = new extensionLanguages( $group ); 00537 $this->extensions[] = $extension; 00538 } else { 00539 print "No such extension $extension.\n"; 00540 } 00541 } 00542 } 00543 } 00544 00549 protected function defaultChecks() { 00550 return array( 00551 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural', 00552 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced', 00553 ); 00554 } 00555 00560 protected function nonMessageChecks() { 00561 return array(); 00562 } 00563 00568 protected function easyChecks() { 00569 return array( 00570 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars', 00571 ); 00572 } 00573 00578 protected function help() { 00579 return <<<ENDS 00580 Run this script to check the status of a specific language in extensions, or all of them. 00581 Command line settings are in form --parameter[=value], except for the first one. 00582 Parameters: 00583 * First parameter (mandatory): Extension name, multiple extension names (separated by commas), "all" for all the extensions, "wikimedia" for extensions used by Wikimedia or "flaggedrevs" for all FLaggedRevs extension messages. 00584 * lang: Language code (default: the installation default language). 00585 * help: Show this help. 00586 * level: Show the following display level (default: 2). 00587 * links: Link the message values (default off). 00588 * wikilang: For the links, what is the content language of the wiki to display the output in (default en). 00589 * whitelist: Do only the following checks (form: code,code). 00590 * blacklist: Do not perform the following checks (form: code,code). 00591 * easy: Do only the easy checks, which can be treated by non-speakers of the language. 00592 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc): 00593 * untranslated: Messages which are required to translate, but are not translated. 00594 * duplicate: Messages which translation equal to fallback 00595 * obsolete: Messages which are untranslatable, but translated. 00596 * variables: Messages without variables which should be used, or with variables which should not be used. 00597 * empty: Empty messages. 00598 * whitespace: Messages which have trailing whitespace. 00599 * xhtml: Messages which are not well-formed XHTML (checks only few common errors). 00600 * chars: Messages with hidden characters. 00601 * links: Messages which contains broken links to pages (does not find all). 00602 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}. 00603 Display levels (default: 2): 00604 * 0: Skip the checks (useful for checking syntax). 00605 * 1: Show only the stub headers and number of wrong messages, without list of messages. 00606 * 2: Show only the headers and the message keys, without the message values. 00607 * 3: Show both the headers and the complete messages, with both keys and values. 00608 00609 ENDS; 00610 } 00611 00615 public function execute() { 00616 $this->doChecks(); 00617 } 00618 00623 protected function checkLanguage( $code ) { 00624 foreach( $this->extensions as $extension ) { 00625 $this->L = $extension; 00626 $this->results = array(); 00627 $this->results[$code] = parent::checkLanguage( $code ); 00628 00629 if( !$this->isEmpty() ) { 00630 echo $extension->name() . ":\n"; 00631 00632 if( $this->level > 0 ) { 00633 switch( $this->output ) { 00634 case 'plain': 00635 $this->outputText(); 00636 break; 00637 case 'wiki': 00638 $this->outputWiki(); 00639 break; 00640 default: 00641 throw new MWException( "Invalid output type $this->output" ); 00642 } 00643 } 00644 00645 echo "\n"; 00646 } 00647 } 00648 } 00649 } 00650 00651 # Blacklist some checks for some languages 00652 $checkBlacklist = array( 00653 #'code' => array( 'check1', 'check2' ... ) 00654 'az' => array( 'plural' ), 00655 'bo' => array( 'plural' ), 00656 'dz' => array( 'plural' ), 00657 'id' => array( 'plural' ), 00658 'fa' => array( 'plural' ), 00659 'gan' => array( 'plural' ), 00660 'gan-hans' => array( 'plural' ), 00661 'gan-hant' => array( 'plural' ), 00662 'gn' => array( 'plural' ), 00663 'hak' => array( 'plural' ), 00664 'hu' => array( 'plural' ), 00665 'ja' => array( 'plural' ), // Does not use plural 00666 'jv' => array( 'plural' ), 00667 'ka' => array( 'plural' ), 00668 'kk-arab' => array( 'plural' ), 00669 'kk-cyrl' => array( 'plural' ), 00670 'kk-latn' => array( 'plural' ), 00671 'km' => array( 'plural' ), 00672 'kn' => array( 'plural' ), 00673 'ko' => array( 'plural' ), 00674 'lzh' => array( 'plural' ), 00675 'mn' => array( 'plural' ), 00676 'ms' => array( 'plural' ), 00677 'my' => array( 'plural', 'chars' ), // Uses a lot zwnj 00678 'sah' => array( 'plural' ), 00679 'sq' => array( 'plural' ), 00680 'tet' => array( 'plural' ), 00681 'th' => array( 'plural' ), 00682 'to' => array( 'plural' ), 00683 'tr' => array( 'plural' ), 00684 'vi' => array( 'plural' ), 00685 'wuu' => array( 'plural' ), 00686 'xmf' => array( 'plural' ), 00687 'yo' => array( 'plural' ), 00688 'yue' => array( 'plural' ), 00689 'zh' => array( 'plural' ), 00690 'zh-classical' => array( 'plural' ), 00691 'zh-cn' => array( 'plural' ), 00692 'zh-hans' => array( 'plural' ), 00693 'zh-hant' => array( 'plural' ), 00694 'zh-hk' => array( 'plural' ), 00695 'zh-sg' => array( 'plural' ), 00696 'zh-tw' => array( 'plural' ), 00697 'zh-yue' => array( 'plural' ), 00698 );