#!/usr/bin/env php outputDir) && !mkdir($options->outputDir, 0777, true)) { throw new RuntimeException("Cannot create output directory: {$options->outputDir}\n"); } $options->outputDir = str_replace(DIRECTORY_SEPARATOR, '/', realpath($options->outputDir)); $documentStorage = new DocumentStorage($options); echo 'Processing languages... '; $languages = new Languages($options, $documentStorage); echo "done.\n"; echo 'Processing scripts... '; $scripts = new Scripts($options, $documentStorage); echo "done.\n"; echo 'Processing territories... '; $territories = new Territories($options, $documentStorage); echo "done.\n"; echo 'Processing plural rules... '; $plurals = new Plurals($options, $documentStorage, $languages); echo "done.\n"; echo 'Saving... '; $languages->save(); $scripts->save(); $territories->save(); $plurals->save(); echo "done.\n"; } class Options { /** * @var string */ public $cldrVersion; /** * @var string */ public $outputDir; public function __construct(array $argv) { if (array_intersect($argv, array('-h', '--help'))) { $this->showSyntax($argv[0], 0); } $this->outputDir = $this->getDefaultOutputDir(); switch (count($argv)) { case 3: $this->outputDir = str_replace(DIRECTORY_SEPARATOR, '/', $argv[2]); // no break case 2: $this->cldrVersion = $argv[1]; if (!preg_match('/^\d+(\.\d+)?(-(alpha|beta)\d+)?$/', $this->cldrVersion)) { throw new RuntimeException("{$this->cldrVersion} is not a valid CLDR version identifier"); } break; default: $this->showSyntax($argv[0], 1); } } /** * @param string $programName * @param int $exitCode * * @return never */ private function showSyntax($programName, $exitCode) { $programName = str_replace('/', DIRECTORY_SEPARATOR, $programName); $defaultOutputDir = str_replace('/', DIRECTORY_SEPARATOR, $this->getDefaultOutputDir()); echo << [output-dir] Arguments: cldr-version: the version of the CLDR data. Examples: 47 47-beta2 47-alpha1 46.1 46.1-beta1 output-dir: the directory where the data will be written to Default: {$defaultOutputDir} EOT; exit($exitCode); } /** * @return string */ private function getDefaultOutputDir() { return str_replace(DIRECTORY_SEPARATOR, '/', dirname(__DIR__)) . '/src/cldr-data'; } } class DocumentStorage { /** * @var string */ private $baseUrl; private $context; private $cache; public function __construct(Options $options) { $this->baseUrl = 'https://raw.githubusercontent.com/unicode-org/cldr/refs/tags/release-' . str_replace('.', '-', $options->cldrVersion); $this->context = stream_context_create(array( 'http' => array( 'follow_location' => 1, 'ignore_errors' => false, ), )); $this->cache = array(); } /** * @param string $path * * @throws RuntimeException * * @return DOMDocument */ public function get($path) { if (!isset($this->cache[$path])) { $xml = $this->fetch($path); $doc = $this->loadXml($xml); $this->cache[$path] = $doc; } return $this->cache[$path]; } /** * @param string $path * * @throws RuntimeException * * @return string */ private function fetch($path) { $url = $this->baseUrl . '/' . ltrim($path, '/'); set_error_handler(function () {}, -1); $content = file_get_contents($url, false, $this->context); restore_error_handler(); if ($content === false) { $details = ''; /** @var array $http_response_header */ if (!empty($http_response_header)) { $details = " - {$http_response_header[0]}"; } throw new RuntimeException("Failed to download from {$url}{$details}"); } return $content; } /** * @param string $xml * * @throws RuntimeException * * @return DOMDocument */ private function loadXml($xml) { $doc = new DOMDocument(); libxml_clear_errors(); $restore = libxml_use_internal_errors(true); $loaded = $doc->loadXML($xml); $errors = libxml_get_errors(); libxml_use_internal_errors($restore); $lines = array(); foreach ($errors as $error) { $lines[] = "{$error->message} at line {$error->line}"; } if (!$loaded || $errors !== array()) { throw new RuntimeException("Failed to parse XML:\n" . implode("\n", $lines)); } return $doc; } } abstract class Processor { /** * @var Options */ protected $options; /** * @var array */ protected $data; /** * @var DocumentStorage */ private $documentStorage; /** * @var string */ private $path; protected function __construct(Options $options, DocumentStorage $documentStorage, $path) { $this->options = $options; $this->documentStorage = $documentStorage; $this->path = ltrim($path, '/'); $doc = $this->documentStorage->get($this->path); $this->data = $this->parse($doc); } /** * @return void */ public function save() { $file = $this->getOutputFile(); $dir = dirname($file); if (!is_dir($dir) && !mkdir($dir, 0777, true)) { throw new RuntimeException("Cannot create directory: {$dir}"); } $flags = 0; if (defined('JSON_UNESCAPED_SLASHES')) { $flags |= JSON_UNESCAPED_SLASHES; } if (defined('JSON_UNESCAPED_UNICODE')) { $flags |= JSON_UNESCAPED_UNICODE; } if (defined('JSON_PRETTY_PRINT')) { $flags |= JSON_PRETTY_PRINT; } if (defined('JSON_THROW_ON_ERROR')) { $flags |= JSON_THROW_ON_ERROR; } $json = json_encode($this->data, $flags); if (!file_put_contents($file, $json)) { throw new RuntimeException("Failed to write to file: {$file}"); } } /** * @return array */ abstract protected function parse(DOMDocument $doc); /** * @return void */ protected function sortByKeyWithPossiblyAlt(array &$data) { uksort($data, function ($a, $b) { $aAlt = strpos($a, '-alt-') !== false; $bAlt = strpos($b, '-alt-') !== false; if ($aAlt !== $bAlt) { if (strpos("{$a}-alt-", $b) === 0) { return 0; } if (strpos($a, "{$b}-alt-") === 0) { return -1; } } return strcasecmp($a, $b); }); } /** * @return string */ abstract protected function getOutputRelativeFileName(); /** * @param string $xml * * @return DOMDocument */ private function loadXml($xml) { $doc = new DOMDocument(); libxml_clear_errors(); $restore = libxml_use_internal_errors(true); $loaded = $doc->loadXML($xml); $errors = libxml_get_errors(); libxml_use_internal_errors($restore); $lines = array(); foreach ($errors as $error) { $lines[] = "{$error->message} at line {$error->line}"; } if (!$loaded || $errors !== array()) { throw new RuntimeException("Failed to parse XML:\n" . implode("\n", $lines)); } return $doc; } /** * @return string */ private function getOutputFile() { return $this->options->outputDir . '/' . ltrim($this->getOutputRelativeFileName(), '/'); } } class Plurals extends Processor { /** * @var Languages */ private $languages; public function __construct(Options $options, DocumentStorage $documentStorage, Languages $languages) { $this->languages = $languages; parent::__construct($options, $documentStorage, 'common/supplemental/plurals.xml'); } /** * {@inheritdoc} * * @see Processor::parse() */ protected function parse(DOMDocument $doc) { $data = array(); $xpath = new DOMXPath($doc); $xPluralRulesList = $xpath->query('/supplementalData/plurals[@type="cardinal"]/pluralRules'); $definedLanguageIDs = $this->languages->getDefinedLanguageIDs(); $knownMissingLanguages = array( 'guw', // Gun 'lld', // Dolomitic Ladin 'hnj', // Hmong Njua 'nah', // Nahuatl 'smi', // Sami ); $replacements = array( 'in' => 'id', // Former Indonesian 'iw' => 'he', // Former Hebrew 'jw' => 'jv', // Former Javanese 'ji' => 'yi', // Former Yiddish 'mo' => 'ro-MD', // former Moldavian 'bh' => '', // Former Bihari: dismissed because it can be 'bho', 'mai' or 'mag' // Just a CLDR placeholder 'root' => '', ); $unrecognizedLocaleCodes = array(); foreach ($xPluralRulesList as $xPluralRules) { $locales = preg_split('/\s+/', (string) $xPluralRules->getAttribute('locales'), -1, PREG_SPLIT_NO_EMPTY); if ($locales === array()) { throw new RuntimeException('No locales found in pluralRules element'); } $elements = array( 'pluralRule-count-zero' => null, 'pluralRule-count-one' => null, 'pluralRule-count-two' => null, 'pluralRule-count-few' => null, 'pluralRule-count-many' => null, 'pluralRule-count-other' => null, ); foreach ($xPluralRules->childNodes as $xPluralRule) { if (!$xPluralRule instanceof DOMElement) { continue; } if ($xPluralRule->tagName !== 'pluralRule') { throw new RuntimeException("Unexpected element: {$xPluralRule->tagName}"); } $count = (string) $xPluralRule->getAttribute('count'); if ($count === '') { throw new RuntimeException('Missing count attribute'); } $key = "pluralRule-count-{$count}"; if (!array_key_exists($key, $elements)) { throw new RuntimeException("Unknown count: {$count}"); } if ($elements[$key] !== null) { throw new RuntimeException("Duplicate count: {$count}"); } $elements[$key] = $xPluralRule->textContent; } $elements = array_filter($elements, function ($value) { return $value !== null; }); if ($elements === array()) { throw new RuntimeException('No plural rules found'); } foreach ($locales as $locale) { $locale = str_replace('_', '-', $locale); $overwrite = true; if (isset($data[$locale]) && array_search($locale, $replacements, true) === false) { throw new RuntimeException("Duplicate locale: {$locale}"); } if (!in_array($locale, $definedLanguageIDs, true) && !in_array($locale, $knownMissingLanguages, true)) { if (!isset($replacements[$locale])) { $unrecognizedLocaleCodes[] = $locale; continue; } $locale = $replacements[$locale]; if ($locale === '') { continue; } $overwrite = false; } if ($overwrite || !isset($data[$locale])) { $data[$locale] = $elements; } } } if ($unrecognizedLocaleCodes !== array()) { throw new RuntimeException("The following locales are not defined:\n- " . implode("\n- ", $unrecognizedLocaleCodes)); } if ($data === array()) { throw new RuntimeException('No plural rules found'); } $this->sortByKeyWithPossiblyAlt($data); return array( 'supplemental' => array( 'version' => array( '_cldrVersion' => $this->options->cldrVersion, ), 'plurals-type-cardinal' => $data, ), ); } /** * {@inheritdoc} * * @see Processor::getOutputRelativeFileName() */ protected function getOutputRelativeFileName() { return 'supplemental/plurals.json'; } } abstract class LocaleDisplayName extends Processor { public function __construct(Options $options, DocumentStorage $documentStorage) { parent::__construct($options, $documentStorage, 'common/main/en.xml'); } /** * {@inheritdoc} * * @see Processor::parse() */ protected function parse(DOMDocument $doc) { $data = array(); $xpath = new DOMXPath($doc); $xElementList = $xpath->query($this->getXPathSelector()); foreach ($xElementList as $xElement) { $type = (string) $xElement->getAttribute('type'); if ($type === '') { throw new RuntimeException('Missing type attribute'); } $key = str_replace('_', '-', $type); $alt = (string) $xElement->getAttribute('alt'); if ($alt !== '') { $key = "{$key}-alt-{$alt}"; } if (isset($data[$key])) { throw new RuntimeException("Duplicate key: {$key}"); } $data[$key] = (string) $xElement->textContent; } if ($data === array()) { throw new RuntimeException('No elements found'); } $this->sortByKeyWithPossiblyAlt($data); return array( 'main' => array( 'en-US' => array( 'identity' => array( 'version' => array( '_cldrVersion' => $this->options->cldrVersion, ), 'language' => 'en', 'territory' => 'US', ), 'localeDisplayNames' => array( $this->getExportedNodeName() => $data, ), ), ), ); } /** * @return string */ abstract protected function getXPathSelector(); /** * @return string */ abstract protected function getExportedNodeName(); } class Languages extends LocaleDisplayName { /** * @return string[] */ public function getDefinedLanguageIDs() { return array_values(array_filter( array_keys($this->data['main']['en-US']['localeDisplayNames'][$this->getExportedNodeName()]), function ($key) { return strpos((string) $key, '-alt-') === false; } )); } /** * {@inheritdoc} * * @see LocaleDisplayName::getXPathSelector() */ protected function getXPathSelector() { return '/ldml/localeDisplayNames/languages/language'; } /** * {@inheritdoc} * * @see LocaleDisplayName::getExportedNodeName() */ protected function getExportedNodeName() { return 'languages'; } /** * {@inheritdoc} * * @see Processor::getOutputRelativeFileName() */ protected function getOutputRelativeFileName() { return 'main/en-US/languages.json'; } } class Scripts extends LocaleDisplayName { /** * {@inheritdoc} * * @see LocaleDisplayName::getXPathSelector() */ protected function getXPathSelector() { return '/ldml/localeDisplayNames/scripts/script'; } /** * {@inheritdoc} * * @see LocaleDisplayName::getExportedNodeName() */ protected function getExportedNodeName() { return 'scripts'; } /** * {@inheritdoc} * * @see Processor::getOutputRelativeFileName() */ protected function getOutputRelativeFileName() { return 'main/en-US/scripts.json'; } } class Territories extends LocaleDisplayName { /** * {@inheritdoc} * * @see LocaleDisplayName::getXPathSelector() */ protected function getXPathSelector() { return '/ldml/localeDisplayNames/territories/territory'; } /** * {@inheritdoc} * * @see LocaleDisplayName::getExportedNodeName() */ protected function getExportedNodeName() { return 'territories'; } /** * {@inheritdoc} * * @see Processor::getOutputRelativeFileName() */ protected function getOutputRelativeFileName() { return 'main/en-US/territories.json'; } } try { main($argv); } catch (RuntimeException $e) { fwrite(STDERR, $e->getMessage() . "\n"); exit(1); }