blob: 9529abe7e05fed5b3867afce525628a676375385 [file] [log] [blame]
//-------------------------------------------------------------------------------------------------------
// Copyright (C) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
//-------------------------------------------------------------------------------------------------------
WScript.LoadScriptFile("..\\UnitTestFramework\\UnitTestFramework.js");
function assertTest(asserter, re, string, message) {
asserter(re.test(string), message);
}
function assertMatches() {
assertTest(assert.isTrue, ...arguments);
}
function assertDoesNotMatch(re, string, message) {
assertTest(assert.isFalse, ...arguments);
}
// TODO: RegExp functions currently process strings as a list of code units as
// opposed to a list of code points. This causes a RegExp to match just
// the high surrogate. For example, /[^\ud800\udc00]/ matches
// "\ud800\udc00". This this due to "\ud800" being in the negated set and
// matching the first code unit in the string.
//
// Some of the patterns below have the format "^...$" to force the RegExp
// to match the string fully. Once the bug is fixes, the '^'s and '$'s
// can be removed. The bug # is 3679792.
var tests = [
{
name: "A character set containing a negated character from a supplementary plane shouldn't match the character itself",
body: function () {
assertDoesNotMatch(/^[^\ud800\udc00]$/u, "\ud800\udc00", "Surrogate pair in RegExp and surrogate pair in string to test");
assertDoesNotMatch(/^[^\ud800\udc00]$/u, "\u{10000}", "Surrogate pair in RegExp and code point in string to test");
assertDoesNotMatch(/^[^\u{10000}]$/u, "\ud800\udc00", "Code point in RegExp and surrogate pair in string to test");
assertDoesNotMatch(/^[^\u{10000}]$/u, "\u{10000}", "Code point in RegExp and code point in string to test");
}
},
{
name: "A character set containing a negated character from a supplementary plane should match other characters",
body: function () {
assertMatches(/^[^\ud800\udc00]$/u, "\ud801\udc01", "Surrogate pair in RegExp and surrogate pair in string to test");
assertMatches(/^[^\u{10000}]$/u, "\ud801\udc01", "Surrogate pair in RegExp and code point in string to test");
assertMatches(/^[^\ud800\udc00]$/u, "\u{10101}", "Code point in RegExp and surrogate pair in string to test");
assertMatches(/^[^\u10000]$/u, "\u{10101}", "Code point in RegExp and code point in string to test");
assertMatches(/^[^\u10000]$/u, "\u0345", "Code point in RegExp and code unit in string to test");
assertMatches(/^[^\ud800\udc00]$/u, "\u0345", "Surrogate pair in RegExp and code unit in string to test");
}
},
{
name: "A character set containing a negated character from the basic plane should match characters from supplementary planes",
body: function () {
assertMatches(/^[^0345]$/u, "\ud800\udc00", "Surrogate pair");
assertMatches(/^[^0345]$/u, "\u{10000}", "Code point");
}
},
{
name: "A character set containing a range spanning multiple planes should match characters from all those planes",
body: function () {
var re = /^[\u0000-\u{10FFFF}]$/u;
var numberOfPlanes = 17;
for (var plane = 0; plane < numberOfPlanes; ++plane) {
function getCharacterInPlane(code) {
var codePoint = plane * 0x10000 + code;
return String.fromCodePoint(codePoint);
}
assertMatches(re, getCharacterInPlane(0x0000), "First character in plane #" + plane);
assertMatches(re, getCharacterInPlane(0xFFFF), "Last character in plane #" + plane);
}
}
},
{
name: "A dash character and a non-dash character following a full one shouldn't be interpreted as a range",
body: function () {
var re = /^[\ud800-\udbff\udc00-\udbff\udc02]$/u;
assertDoesNotMatch(re, "\udbff\udc01", "Shouldn't be in the second range");
assertMatches(re, "-", "Second '-' treated as a normal character");
}
},
{
name: "Reserved characters shouldn't be ignored when they are in a character set together with characters from a supplementary plane",
body: function () {
assertMatches(/^[\ud800\udc00 \ud800]$/u, "\ud800", "Start of the reserver character range (\\ud800)");
assertMatches(/^[\ud800\udc00 \udfff]$/u, "\udfff", "Start of the reserver character range (\\udfff)");
}
},
{
name: "A high and a low surrogate part with a '-' between should be interpreted as a range",
body: function () {
assertMatches(/^[\ud800-\udfff]$/u, "\ud800", "Range start");
assertMatches(/^[\ud800-\udfff]$/u, "\udfff", "Range end");
// We had a bug where we interpreted the character set below as [\ud800\udfff] and omitted '-'.
assertDoesNotMatch(/^[\ud800-\udfff]$/u, "\ud800\udfff", "Not a surrogate pair");
}
}
];
testRunner.runTests(tests, { verbose: WScript.Arguments[0] != "summary" });