blob: 0f10355bc810c24ff3c8cca9195dcb65214050a0 [file] [log] [blame]
// Copyright (C) 2020 André Bargull. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-intl.getcanonicallocales
description: >
Assert non-simple region subtag replacements work as expected.
info: |
8.2.1 Intl.getCanonicalLocales (locales)
1. Let ll be ? CanonicalizeLocaleList(locales).
2. Return CreateArrayFromList(ll).
9.2.1 CanonicalizeLocaleList (locales)
...
7. Repeat, while k < len
...
c. If kPresent is true, then
...
vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag).
...
UTS 35, §3.2.1 Canonical Unicode Locale Identifiers
- Replace aliases in the unicode_language_id and tlang (if any) using the following process:
- If the region subtag matches the type attribute of a territoryAlias element in
Supplemental Data, replace the language subtag with the replacement value, as follows:
1. If there is a single territory in the replacement, use it.
2. If there are multiple territories:
1. Look up the most likely territory for the base language code (and script, if there is one).
2. If that likely territory is in the list, use it.
3. Otherwise, use the first territory in the list.
includes: [testIntl.js]
---*/
// CLDR contains region mappings where the replacement region depends on the
// likely subtags from the language and script subtags.
const testData = {
// For example, the breakup of the Soviet Union ("SU") means that the region of
// the Soviet Union ("SU") is replaced by Russia ("RU"), Armenia ("AM"), or
// many others -- depending on the specified (or merely likely) language and
// script subtags:
//
// <territoryAlias type="SU" replacement="RU AM AZ BY EE GE KZ KG LV LT MD TJ TM UA UZ" reason="deprecated"/>
// <territoryAlias type="810" replacement="RU AM AZ BY EE GE KZ KG LV LT MD TJ TM UA UZ" reason="overlong"/>
"ru-SU": "ru-RU",
"ru-810": "ru-RU",
"en-SU": "en-RU",
"en-810": "en-RU",
"und-SU": "und-RU",
"und-810": "und-RU",
"und-Latn-SU": "und-Latn-RU",
"und-Latn-810": "und-Latn-RU",
// Armenia can be the preferred region when the language is "hy" (Armenian) or
// the script is "Armn" (Armenian).
//
// <likelySubtag from="hy" to="hy_Armn_AM"/>
// <likelySubtag from="und_Armn" to="hy_Armn_AM"/>
"hy-SU": "hy-AM",
"hy-810": "hy-AM",
"und-Armn-SU": "und-Armn-AM",
"und-Armn-810": "und-Armn-AM",
// <territoryAlias type="CS" replacement="RS ME" reason="deprecated"/>
//
// The following likely-subtags entries contain "RS" and "ME":
//
// <likelySubtag from="sr" to="sr_Cyrl_RS"/>
// <likelySubtag from="sr_ME" to="sr_Latn_ME"/>
// <likelySubtag from="und_RS" to="sr_Cyrl_RS"/>
// <likelySubtag from="und_ME" to="sr_Latn_ME"/>
//
// In this case there is no language/script combination (without a region
// subtag) where "ME" is ever chosen, so the replacement is always "RS".
"sr-CS": "sr-RS",
"sr-Latn-CS": "sr-Latn-RS",
"sr-Cyrl-CS": "sr-Cyrl-RS",
// The existing region in the source locale identifier is ignored when selecting
// the likely replacement region. For example take "az-NT", which is Azerbaijani
// spoken in the Neutral Zone. The replacement region for "NT" is either
// "SA" (Saudi-Arabia) or "IQ" (Iraq), and there is also a likely subtags entry
// for "az-IQ". But when only looking at the language subtag in "az-NT", "az" is
// always resolved to "az-Latn-AZ", and because "AZ" is not in the list ["SA",
// "IQ"], the final replacement region is the default for "NT", namely "SA".
// That means "az-NT" will be canonicalised to "az-SA" and not "az-IQ", even
// though the latter may be a more sensible candidate based on the actual usage
// of the target locales.
//
// <territoryAlias type="NT" replacement="SA IQ" reason="deprecated"/>
// <likelySubtag from="az_IQ" to="az_Arab_IQ"/>
// <likelySubtag from="az" to="az_Latn_AZ"/>
"az-NT": "az-SA",
};
for (let [tag, canonical] of Object.entries(testData)) {
// Make sure the test data is correct.
assert(
isCanonicalizedStructurallyValidLanguageTag(canonical),
"\"" + canonical + "\" is a canonicalized and structurally valid language tag."
);
let result = Intl.getCanonicalLocales(tag);
assert.sameValue(result.length, 1);
assert.sameValue(result[0], canonical);
}