JSTests/test262/test/intl402/Locale/likely-subtags-grandfathered.js - WebKit - Git at Google

 // Copyright 2018 André Bargull; Igalia, S.L. All rights reserved.
 // This code is governed by the BSD license found in the LICENSE file.

 /*---
 esid: sec-intl.locale
 description: >
     Verifies canonicalization, minimization and maximization of specific tags.
 info: |
     ApplyOptionsToTag( tag, options )

     2. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.

     9. Set tag to CanonicalizeLanguageTag(tag).

     CanonicalizeLanguageTag( tag )

     The CanonicalizeLanguageTag abstract operation returns the canonical and
     case-regularized form of the locale argument (which must be a String value
     that is a structurally valid Unicode BCP 47 Locale Identifier as verified by
     the IsStructurallyValidLanguageTag abstract operation).

     IsStructurallyValidLanguageTag ( locale )

     The IsStructurallyValidLanguageTag abstract operation verifies that the
     locale argument (which must be a String value)

     represents a well-formed Unicode BCP 47 Locale Identifier" as specified in
     Unicode Technical Standard 35 section 3.2, or successor,


     Intl.Locale.prototype.maximize ()
     3. Let maximal be the result of the Add Likely Subtags algorithm applied to loc.[[Locale]].

     Intl.Locale.prototype.minimize ()
     3. Let minimal be the result of the Remove Likely Subtags algorithm applied to loc.[[Locale]].
 features: [Intl.Locale]
 ---*/

 const irregularGrandfathered = [
     "en-GB-oed",
     "i-ami",
     "i-bnn",
     "i-default",
     "i-enochian",
     "i-hak",
     "i-klingon",
     "i-lux",
     "i-mingo",
     "i-navajo",
     "i-pwn",
     "i-tao",
     "i-tay",
     "i-tsu",
     "sgn-BE-FR",
     "sgn-BE-NL",
     "sgn-CH-DE",
 ];

 for (const tag of irregularGrandfathered) {
     assert.throws(RangeError, () => new Intl.Locale(tag));
 }

 const regularGrandfathered = [
     {
         tag: "art-lojban",
         canonical: "jbo",
         maximized: "jbo-Latn-001",
     },
     {
         tag: "zh-guoyu",
         canonical: "zh",
         maximized: "zh-Hans-CN",
     },
     {
         tag: "zh-hakka",
         canonical: "hak",
         maximized: "hak-Hans-CN",
     },
     {
         tag: "zh-xiang",
         canonical: "hsn",
         maximized: "hsn-Hans-CN",
     },
 ];

 for (const {tag, canonical, maximized = canonical, minimized = canonical} of regularGrandfathered) {
     const loc = new Intl.Locale(tag);
     assert.sameValue(loc.toString(), canonical);

     assert.sameValue(loc.maximize().toString(), maximized);
     assert.sameValue(loc.maximize().maximize().toString(), maximized);

     assert.sameValue(loc.minimize().toString(), minimized);
     assert.sameValue(loc.minimize().minimize().toString(), minimized);

     assert.sameValue(loc.maximize().minimize().toString(), minimized);
     assert.sameValue(loc.minimize().maximize().toString(), maximized);
 }

 const regularGrandfatheredWithExtLang = [
     "no-bok",
     "no-nyn",
     "zh-min",
     "zh-min-nan",
 ];

 for (const tag of regularGrandfatheredWithExtLang) {
     assert.throws(RangeError, () => new Intl.Locale(tag));
 }

 // Add variants, extensions, and privateuse subtags to regular grandfathered
 // language tags and ensure it produces the "expected" result.
 const extras = [
     "fonipa",
     "a-not-assigned",
     "u-attr",
     "u-co",
     "u-co-phonebk",
     "x-private",
 ];

 for (const {tag} of regularGrandfathered) {
     const priv = "-x-0";
     const tagMax = new Intl.Locale(tag + priv).maximize().toString().slice(0, -priv.length);
     const tagMin = new Intl.Locale(tag + priv).minimize().toString().slice(0, -priv.length);

     for (const extra of extras) {
         const loc = new Intl.Locale(tag + "-" + extra);

         let canonical = tag + "-" + extra;
         let canonicalMax = tagMax + "-" + extra;
         let canonicalMin = tagMin + "-" + extra;

         // Ensure the added variant subtag is correctly sorted in the canonical tag.
         if (/^[a-z0-9]{5,8}|[0-9][a-z0-9]{3}$/i.test(extra)) {
             const sorted = s => s.replace(/(-([a-z0-9]{5,8}|[0-9][a-z0-9]{3}))+$/i,
                                           m => m.split("-").sort().join("-"));
             canonical = sorted(canonical);
             canonicalMax = sorted(canonicalMax);
             canonicalMin = sorted(canonicalMin);
         }

         // Adding extra subtags to grandfathered tags can have "interesting" results. Take for
         // example "art-lojban" when "fonipa" is added, so we get "art-lojban-fonipa". The first
         // step when canonicalising the language tag is to bring it in 'canonical syntax', that
         // means among other things sorting variants in alphabetical order. So "art-lojban-fonipa"
         // is transformed to "art-fonipa-lojban", because "fonipa" is sorted before "lojban". And
         // only after that has happened, we replace aliases with their preferred form.
         //
         // Now the usual problems arise when doing silly things like adding subtags to
         // grandfathered subtags, nobody, neither RFC 5646 nor UTS 35, provides a clear description
         // what needs to happen next.
         //
         // From <http://unicode.org/reports/tr35/#Language_Tag_to_Locale_Identifier>:
         //
         // > If the BCP 47 primary language subtag matches the type attribute of a languageAlias
         // > element in Supplemental Data, replace the language subtag with the replacement value.
         // >  1. ...
         // >  2. Five special deprecated grandfathered codes (such as i-default) are in type
         //       attributes, and are also replaced.
         // >  3. ...
         //
         // So let's assume grandfathered tags are treated as 'primary language subtag' if and only
         // if no additional subtags are present. Because in all other cases, we don't really have a
         // grandfathered tag, but only some arbitrary combination of random subtags.
         //
         // Basically what we expect here is that only grandfathered without any additional subtags
         // are canonicalised to their modern form and in all other cases they're left as is.
         //
         // Not all language tag processor will pass this test, for example because they don't order
         // variant subtags in alphabetical order or they're too eager when detecting grandfathered
         // tags. For example "zh-hakka-hakka" is accepted in some language tag processors, because
         // the language tag starts with a prefix which matches a grandfathered tag, and that prefix
         // is then canonicalised to "hak" and the second "hakka" is simply appended to it, so the
         // resulting tag is "hak-hakka". This is clearly wrong as far as ECMA-402 compliance is
         // concerned, because language tags are parsed and validated before any canonicalisation
         // happens. And during the validation step an error should be emitted, because the input
         // "zh-hakka-hakka" contains two identical variant subtags.
         //
         // From <https://tc39.es/ecma402/#sec-isstructurallyvalidlanguagetag>:
         //
         // > does not include duplicate variant subtags
         //
         // So, if your implementation fails this assertion, but you still like to test the rest of
         // this file, a pull request to split this file seems the way to go!
         assert.sameValue(loc.toString(), canonical);

         assert.sameValue(loc.maximize().toString(), canonicalMax);
         assert.sameValue(loc.maximize().maximize().toString(), canonicalMax);

         assert.sameValue(loc.minimize().toString(), canonicalMin);
         assert.sameValue(loc.minimize().minimize().toString(), canonicalMin);

         assert.sameValue(loc.maximize().minimize().toString(), canonicalMin);
         assert.sameValue(loc.minimize().maximize().toString(), canonicalMax);
     }
 }
	// Copyright 2018 André Bargull; Igalia, S.L. All rights reserved.
	// This code is governed by the BSD license found in the LICENSE file.

	/*---
	esid: sec-intl.locale
	description: >
	Verifies canonicalization, minimization and maximization of specific tags.
	info: \|
	ApplyOptionsToTag( tag, options )

	2. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception.

	9. Set tag to CanonicalizeLanguageTag(tag).

	CanonicalizeLanguageTag( tag )

	The CanonicalizeLanguageTag abstract operation returns the canonical and
	case-regularized form of the locale argument (which must be a String value
	that is a structurally valid Unicode BCP 47 Locale Identifier as verified by
	the IsStructurallyValidLanguageTag abstract operation).

	IsStructurallyValidLanguageTag ( locale )

	The IsStructurallyValidLanguageTag abstract operation verifies that the
	locale argument (which must be a String value)

	represents a well-formed Unicode BCP 47 Locale Identifier" as specified in
	Unicode Technical Standard 35 section 3.2, or successor,


	Intl.Locale.prototype.maximize ()
	3. Let maximal be the result of the Add Likely Subtags algorithm applied to loc.[[Locale]].

	Intl.Locale.prototype.minimize ()
	3. Let minimal be the result of the Remove Likely Subtags algorithm applied to loc.[[Locale]].
	features: [Intl.Locale]
	---*/

	const irregularGrandfathered = [
	"en-GB-oed",
	"i-ami",
	"i-bnn",
	"i-default",
	"i-enochian",
	"i-hak",
	"i-klingon",
	"i-lux",
	"i-mingo",
	"i-navajo",
	"i-pwn",
	"i-tao",
	"i-tay",
	"i-tsu",
	"sgn-BE-FR",
	"sgn-BE-NL",
	"sgn-CH-DE",
	];

	for (const tag of irregularGrandfathered) {
	assert.throws(RangeError, () => new Intl.Locale(tag));
	}

	const regularGrandfathered = [
	{
	tag: "art-lojban",
	canonical: "jbo",
	maximized: "jbo-Latn-001",
	},
	{
	tag: "zh-guoyu",
	canonical: "zh",
	maximized: "zh-Hans-CN",
	},
	{
	tag: "zh-hakka",
	canonical: "hak",
	maximized: "hak-Hans-CN",
	},
	{
	tag: "zh-xiang",
	canonical: "hsn",
	maximized: "hsn-Hans-CN",
	},
	];

	for (const {tag, canonical, maximized = canonical, minimized = canonical} of regularGrandfathered) {
	const loc = new Intl.Locale(tag);
	assert.sameValue(loc.toString(), canonical);

	assert.sameValue(loc.maximize().toString(), maximized);
	assert.sameValue(loc.maximize().maximize().toString(), maximized);

	assert.sameValue(loc.minimize().toString(), minimized);
	assert.sameValue(loc.minimize().minimize().toString(), minimized);

	assert.sameValue(loc.maximize().minimize().toString(), minimized);
	assert.sameValue(loc.minimize().maximize().toString(), maximized);
	}

	const regularGrandfatheredWithExtLang = [
	"no-bok",
	"no-nyn",
	"zh-min",
	"zh-min-nan",
	];

	for (const tag of regularGrandfatheredWithExtLang) {
	assert.throws(RangeError, () => new Intl.Locale(tag));
	}

	// Add variants, extensions, and privateuse subtags to regular grandfathered
	// language tags and ensure it produces the "expected" result.
	const extras = [
	"fonipa",
	"a-not-assigned",
	"u-attr",
	"u-co",
	"u-co-phonebk",
	"x-private",
	];

	for (const {tag} of regularGrandfathered) {
	const priv = "-x-0";
	const tagMax = new Intl.Locale(tag + priv).maximize().toString().slice(0, -priv.length);
	const tagMin = new Intl.Locale(tag + priv).minimize().toString().slice(0, -priv.length);

	for (const extra of extras) {
	const loc = new Intl.Locale(tag + "-" + extra);

	let canonical = tag + "-" + extra;
	let canonicalMax = tagMax + "-" + extra;
	let canonicalMin = tagMin + "-" + extra;

	// Ensure the added variant subtag is correctly sorted in the canonical tag.
	if (/^[a-z0-9]{5,8}\|[0-9][a-z0-9]{3}$/i.test(extra)) {
	const sorted = s => s.replace(/(-([a-z0-9]{5,8}\|[0-9][a-z0-9]{3}))+$/i,
	m => m.split("-").sort().join("-"));
	canonical = sorted(canonical);
	canonicalMax = sorted(canonicalMax);
	canonicalMin = sorted(canonicalMin);
	}

	// Adding extra subtags to grandfathered tags can have "interesting" results. Take for
	// example "art-lojban" when "fonipa" is added, so we get "art-lojban-fonipa". The first
	// step when canonicalising the language tag is to bring it in 'canonical syntax', that
	// means among other things sorting variants in alphabetical order. So "art-lojban-fonipa"
	// is transformed to "art-fonipa-lojban", because "fonipa" is sorted before "lojban". And
	// only after that has happened, we replace aliases with their preferred form.
	//
	// Now the usual problems arise when doing silly things like adding subtags to
	// grandfathered subtags, nobody, neither RFC 5646 nor UTS 35, provides a clear description
	// what needs to happen next.
	//
	// From <http://unicode.org/reports/tr35/#Language_Tag_to_Locale_Identifier>:
	//
	// > If the BCP 47 primary language subtag matches the type attribute of a languageAlias
	// > element in Supplemental Data, replace the language subtag with the replacement value.
	// > 1. ...
	// > 2. Five special deprecated grandfathered codes (such as i-default) are in type
	// attributes, and are also replaced.
	// > 3. ...
	//
	// So let's assume grandfathered tags are treated as 'primary language subtag' if and only
	// if no additional subtags are present. Because in all other cases, we don't really have a
	// grandfathered tag, but only some arbitrary combination of random subtags.
	//
	// Basically what we expect here is that only grandfathered without any additional subtags
	// are canonicalised to their modern form and in all other cases they're left as is.
	//
	// Not all language tag processor will pass this test, for example because they don't order
	// variant subtags in alphabetical order or they're too eager when detecting grandfathered
	// tags. For example "zh-hakka-hakka" is accepted in some language tag processors, because
	// the language tag starts with a prefix which matches a grandfathered tag, and that prefix
	// is then canonicalised to "hak" and the second "hakka" is simply appended to it, so the
	// resulting tag is "hak-hakka". This is clearly wrong as far as ECMA-402 compliance is
	// concerned, because language tags are parsed and validated before any canonicalisation
	// happens. And during the validation step an error should be emitted, because the input
	// "zh-hakka-hakka" contains two identical variant subtags.
	//
	// From <https://tc39.es/ecma402/#sec-isstructurallyvalidlanguagetag>:
	//
	// > does not include duplicate variant subtags
	//
	// So, if your implementation fails this assertion, but you still like to test the rest of
	// this file, a pull request to split this file seems the way to go!
	assert.sameValue(loc.toString(), canonical);

	assert.sameValue(loc.maximize().toString(), canonicalMax);
	assert.sameValue(loc.maximize().maximize().toString(), canonicalMax);

	assert.sameValue(loc.minimize().toString(), canonicalMin);
	assert.sameValue(loc.minimize().minimize().toString(), canonicalMin);

	assert.sameValue(loc.maximize().minimize().toString(), canonicalMin);
	assert.sameValue(loc.minimize().maximize().toString(), canonicalMax);
	}
	}