LayoutTests/js/dom/encode-URI-test.html - WebKit - Git at Google

 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">

 <html>

 <head>
 <meta http-equiv="content-type" content="text/html; charset=UTF-8">
 <title>test of JavaScript URI encoding and decoding methods</title>
 </head>

 <body>

 <script type="text/javascript">

 if (window.testRunner)  testRunner.dumpAsText();

 // --------

 // Helper functions.

 function hexDigit(number)
 {
     if (number >= 10)
         return String.fromCharCode(number + 55);
     return number;
 }

 function printable(s)
 {
     if (s == "")
         return "empty string";
     var p = "";
     for (var i = 0; i < s.length; i++) {
         var c = s.charAt(i);
         var cc = s.charCodeAt(i);
         if (c == "\\") {
             p += "\\\\";
         } else if (c == "\"") {
             p += "\\\"";
         } else if (c == "\n") {
             p += "\\n";
         } else if (c == "\r") {
             p += "\\r";
         } else if (c == "\t") {
             p += "\\t";
         } else if (cc >= 20 && cc < 0x7F) {
             p += c;
         } else if (cc <= 0xFF) {
             p += "\\x" + hexDigit(cc >> 4) + hexDigit(cc & 0xF);
         } else if (cc <= 0xFFFF) {
             p += "\\u" + hexDigit((cc >> 12) & 0xF) + hexDigit((cc >> 8) & 0xF) + hexDigit((cc >> 4) & 0xF) + hexDigit(cc & 0xF);
         } else {
             p += "\\U" + hexDigit((cc >> 28) & 0xF) + hexDigit((cc >> 24) & 0xF) + hexDigit((cc >> 20) & 0xF) + hexDigit((cc >> 16) & 0xF)
                        + hexDigit((cc >> 12) & 0xF) + hexDigit((cc >> 8) & 0xF) + hexDigit((cc >> 4) & 0xF) + hexDigit(cc & 0xF);
         }
     }
     return "\"" + p + "\"";
 }

 function encodedCharacter(c)
 {
     // UTF-8 is what Gecko does, but not what WinIE 6 does.
     // It makes much more sense, though, to produce encodings that actually work in URLs.
     // So for JavaScriptCore, we want to match Gecko on this, WinIE on most other things.

     // Instead of writing a JavaScript implementation of UTF-8 escaping, just do some specific cases here.
     switch (c) {
         case    0x80: return "%C2%80";
         case   0x7FF: return "%DF%BF";
         case   0x800: return "%E0%A0%80";
         case  0x2022: return "%E2%80%A2";
         case  0xD7FF: return "%ED%9F%BF";
         case  0xD800: return "%ED%A0%80";
         case  0xE000: return "%EE%80%80";
         case  0xFFFC: return "%EF%BF%BC";
         case  0xFFFD: return "%EF%BF%BD";
     }

     if (c < 0 || c > 0x7F) {
         window.alert("encodedCharacter doesn't know how to escape character code " + c);
         return "?";
     }

     return "%" + hexDigit(c >> 4) + hexDigit(c - (c >> 4 << 4));
 }

 function escapedCharacter(c)
 {
     // UTF-8 is what Gecko does, but we do what Win IE does instead. We use Latin1 for
     // unicode characters less than or equal to 255, and %u encoding for everything else.
     switch (c) {
         case    0x80: return "%80";
         case   0x7FF: return "%u07FF";
         case   0x800: return "%u0800";
         case  0x2022: return "%u2022";
         case  0xD7FF: return "%uD7FF";
         case  0xD800: return "%uD800";
         case  0xE000: return "%uE000";
         case  0xFFFC: return "%uFFFC";
         case  0xFFFD: return "%uFFFD";
     }

     if (c < 0 || c > 0x7F) {
         window.alert("escapedCharacter doesn't know how to escape character code " + c);
         return "?";
     }

     return "%" + hexDigit(c >> 4) + hexDigit(c - (c >> 4 << 4));
 }

 function forEachCharacterCode(f, s)
 {
     for (var i = 0; i < s.length; i++) {
         f(s.charCodeAt(i));
     }
 }

 function call(functionName, parameter)
 {
     try {
         result = eval(functionName + "(parameter)");
     } catch (e) {
         result = "exception";
     }
     return result;
 }

 // --------

 // Build up tables with expected results.

 var expectedResult = new Object;

 function addExpectedNonEscaped(f, c)
 {
     expectedResult[f + "(" + String.fromCharCode(c) + ")"] = String.fromCharCode(c);
 }

 function addNoEscape(c)
 {
     addExpectedNonEscaped("escape", c);
     addExpectedNonEscaped("encodeURI", c);
     addExpectedNonEscaped("encodeURIComponent", c);
 }

 function addEscapeNoEscape(c)
 {
     addExpectedNonEscaped("escape", c);
 }

 function addURIComponentNoEscape(c)
 {
     addExpectedNonEscaped("encodeURI", c);
     addExpectedNonEscaped("encodeURIComponent", c);
 }

 function addURINoEscape(c)
 {
     addExpectedNonEscaped("encodeURI", c);
     expectedResult["decodeURI(" + escapedCharacter(c) + ")"] = encodedCharacter(c);
     expectedResult["decodeURI(" + escapedCharacter(c).toLowerCase() + ")"] = encodedCharacter(c).toLowerCase();
 }

 forEachCharacterCode(addNoEscape, "*0123456789-.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_");
 forEachCharacterCode(addEscapeNoEscape, "+/");
 forEachCharacterCode(addURINoEscape, "@#$&+,/:;=?");
 forEachCharacterCode(addURIComponentNoEscape, "!'()~");

 // WinIE 6's escape function does not escape @, although Gecko's does.
 expectedResult["escape(@)"] = "@";

 // --------

 // Run tests.

 var failureCount = 0;

 function test(functionName, parameter, desiredResult)
 {
     var alternateResult = expectedResult[functionName + "(" + parameter + ")"];
     if (alternateResult)
         desiredResult = alternateResult;
     var result = call(functionName, parameter);
     if (result != desiredResult) {
         var s = "called " + functionName + " on " + printable(parameter) + " and got " + printable(result) + " instead of " + printable(desiredResult);
         document.writeln("<p>" + s + "</p>");
         failureCount += 1;
     }
 }

 function testEscapeAndEncode(parameter, escapeExpected, encodeExpected)
 {
     test("escape", parameter, escapeExpected);
     test("encodeURI", parameter, encodeExpected);
     test("encodeURIComponent", parameter, encodeExpected);
 }

 function testUnescape(parameter, expected)
 {
     test("unescape", parameter, expected);
 }

 function testDecode(parameter, expected)
 {
     if (expected == "\uFFFE" || expected == "\uFFFF")
         expected = "\uFFFD";

     test("decodeURI", parameter, expected);
     test("decodeURIComponent", parameter, expected);
 }

 function testUnescapeAndDecode(parameter, expectedUnescape, expectedDecode)
 {
     testUnescape(parameter, expectedUnescape);
     testDecode(parameter, expectedDecode);
 }

 function testCharacter(c)
 {
     var s = String.fromCharCode(c);
     var escaped = escapedCharacter(c);
     var encoded = encodedCharacter(c);

     testEscapeAndEncode(s, escaped, encoded);
     testUnescape(escaped, s);
     testUnescape(escaped.toLowerCase(), s);
     testDecode(encoded, s);
     testDecode(encoded.toLowerCase(), s);
 }

 for (var c = 0; c <= 128; c++) {
     testCharacter(c);
 }
 testCharacter(0x7FF);
 testCharacter(0x800);
 testCharacter(0x2022);
 testCharacter(0xD7FF);
 testCharacter(0xE000);
 testCharacter(0xFFFC);
 testCharacter(0xFFFD);

 // These tests are currently turned off because it's not yet entirely clear what correct behavior
 // is for these cases. Gecko seems to reject values in the surrogate range entirely, yet turns
 // U+FFFE and U+FFFF into U+FFFD, even though Unicode documentation says to treat both the same.
 // And all the JavaScript engines seem to use UTF-16 in a way that prevents characters greater
 // than U+FFFF (outside the BMP) from working properly.

 //testCharacter(0xD800);
 //testCharacter(0xDBFF);
 //testCharacter(0xDC00);
 //testCharacter(0xDFFF);
 //testCharacter(0xFFFE);
 //testCharacter(0xFFFF);
 //testCharacter(0x10000);

 testUnescapeAndDecode("%", "%", "exception");
 testUnescapeAndDecode("%0", "%0", "exception");
 testUnescapeAndDecode("%a", "%a", "exception");
 testUnescapeAndDecode("%u", "%u", "exception");
 testUnescapeAndDecode("%xx", "%xx", "exception");
 testUnescapeAndDecode("%u004", "%u004", "exception");
 testUnescapeAndDecode("%u0041", "A", "exception");
 testUnescapeAndDecode("%uxxxx", "%uxxxx", "exception");

 testUnescapeAndDecode(String.fromCharCode(0x80), String.fromCharCode(0x80), String.fromCharCode(0x80));
 testUnescapeAndDecode(String.fromCharCode(0xD800), String.fromCharCode(0xD800), String.fromCharCode(0xD800));

 testUnescapeAndDecode("%C2%80", String.fromCharCode(0xC2) + String.fromCharCode(0x80), String.fromCharCode(0x80));
 testUnescapeAndDecode("%C2", String.fromCharCode(0xC2), "exception");
 testUnescapeAndDecode("%C2" + String.fromCharCode(0x80), String.fromCharCode(0xC2) + String.fromCharCode(0x80), "exception");

 // The characters below hav to be literal because String.fromCharCode will make a single character
 // and the \u syntax won't allow us to specify characters with Unicode values higher than U+FFFF.
 // For most JavaScript engines, this will turn into two characters because they use UTF-16
 // instead of Unicode; it's not clear to me at the moment if the standard asks for this UTF-16
 // behavior, forbids it, or doesn't say either way.
 testEscapeAndEncode("𐀀", "%uD800%uDC00", "%F0%90%80%80");
 testUnescapeAndDecode("%F0%90%80%80", "\xF0\x90\x80\x80", "𐀀");
 testEscapeAndEncode("𦏵", "%uD858%uDFF5", "%F0%A6%8F%B5");
 testUnescapeAndDecode("%F0%A6%8F%B5", "\xF0\xA6\x8F\xB5", "𦏵");
 testEscapeAndEncode("𯿿", "%uD87F%uDFFF", "%F0%AF%BF%BF");
 testUnescapeAndDecode("%F0%AF%BF%BF", "\xF0\xAF\xBF\xBF", "𯿿");

 // --------

 // Summarize.

 var failuresMessage;
 if (failureCount) {
     failuresMessage = failureCount + " tests failed.";
 } else {
     failuresMessage = "No failures.";
 }
 document.writeln("<p>Testing complete. " + failuresMessage + "</p>");

 // --------

 </script>

 </body>

 </html>
	<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">

	<html>

	<head>
	<meta http-equiv="content-type" content="text/html; charset=UTF-8">
	<title>test of JavaScript URI encoding and decoding methods</title>
	</head>

	<body>

	<script type="text/javascript">

	if (window.testRunner) testRunner.dumpAsText();

	// --------

	// Helper functions.

	function hexDigit(number)
	{
	if (number >= 10)
	return String.fromCharCode(number + 55);
	return number;
	}

	function printable(s)
	{
	if (s == "")
	return "empty string";
	var p = "";
	for (var i = 0; i < s.length; i++) {
	var c = s.charAt(i);
	var cc = s.charCodeAt(i);
	if (c == "\\") {
	p += "\\\\";
	} else if (c == "\"") {
	p += "\\\"";
	} else if (c == "\n") {
	p += "\\n";
	} else if (c == "\r") {
	p += "\\r";
	} else if (c == "\t") {
	p += "\\t";
	} else if (cc >= 20 && cc < 0x7F) {
	p += c;
	} else if (cc <= 0xFF) {
	p += "\\x" + hexDigit(cc >> 4) + hexDigit(cc & 0xF);
	} else if (cc <= 0xFFFF) {
	p += "\\u" + hexDigit((cc >> 12) & 0xF) + hexDigit((cc >> 8) & 0xF) + hexDigit((cc >> 4) & 0xF) + hexDigit(cc & 0xF);
	} else {
	p += "\\U" + hexDigit((cc >> 28) & 0xF) + hexDigit((cc >> 24) & 0xF) + hexDigit((cc >> 20) & 0xF) + hexDigit((cc >> 16) & 0xF)
	+ hexDigit((cc >> 12) & 0xF) + hexDigit((cc >> 8) & 0xF) + hexDigit((cc >> 4) & 0xF) + hexDigit(cc & 0xF);
	}
	}
	return "\"" + p + "\"";
	}

	function encodedCharacter(c)
	{
	// UTF-8 is what Gecko does, but not what WinIE 6 does.
	// It makes much more sense, though, to produce encodings that actually work in URLs.
	// So for JavaScriptCore, we want to match Gecko on this, WinIE on most other things.

	// Instead of writing a JavaScript implementation of UTF-8 escaping, just do some specific cases here.
	switch (c) {
	case 0x80: return "%C2%80";
	case 0x7FF: return "%DF%BF";
	case 0x800: return "%E0%A0%80";
	case 0x2022: return "%E2%80%A2";
	case 0xD7FF: return "%ED%9F%BF";
	case 0xD800: return "%ED%A0%80";
	case 0xE000: return "%EE%80%80";
	case 0xFFFC: return "%EF%BF%BC";
	case 0xFFFD: return "%EF%BF%BD";
	}

	if (c < 0 \|\| c > 0x7F) {
	window.alert("encodedCharacter doesn't know how to escape character code " + c);
	return "?";
	}

	return "%" + hexDigit(c >> 4) + hexDigit(c - (c >> 4 << 4));
	}

	function escapedCharacter(c)
	{
	// UTF-8 is what Gecko does, but we do what Win IE does instead. We use Latin1 for
	// unicode characters less than or equal to 255, and %u encoding for everything else.
	switch (c) {
	case 0x80: return "%80";
	case 0x7FF: return "%u07FF";
	case 0x800: return "%u0800";
	case 0x2022: return "%u2022";
	case 0xD7FF: return "%uD7FF";
	case 0xD800: return "%uD800";
	case 0xE000: return "%uE000";
	case 0xFFFC: return "%uFFFC";
	case 0xFFFD: return "%uFFFD";
	}

	if (c < 0 \|\| c > 0x7F) {
	window.alert("escapedCharacter doesn't know how to escape character code " + c);
	return "?";
	}

	return "%" + hexDigit(c >> 4) + hexDigit(c - (c >> 4 << 4));
	}

	function forEachCharacterCode(f, s)
	{
	for (var i = 0; i < s.length; i++) {
	f(s.charCodeAt(i));
	}
	}

	function call(functionName, parameter)
	{
	try {
	result = eval(functionName + "(parameter)");
	} catch (e) {
	result = "exception";
	}
	return result;
	}

	// --------

	// Build up tables with expected results.

	var expectedResult = new Object;

	function addExpectedNonEscaped(f, c)
	{
	expectedResult[f + "(" + String.fromCharCode(c) + ")"] = String.fromCharCode(c);
	}

	function addNoEscape(c)
	{
	addExpectedNonEscaped("escape", c);
	addExpectedNonEscaped("encodeURI", c);
	addExpectedNonEscaped("encodeURIComponent", c);
	}

	function addEscapeNoEscape(c)
	{
	addExpectedNonEscaped("escape", c);
	}

	function addURIComponentNoEscape(c)
	{
	addExpectedNonEscaped("encodeURI", c);
	addExpectedNonEscaped("encodeURIComponent", c);
	}

	function addURINoEscape(c)
	{
	addExpectedNonEscaped("encodeURI", c);
	expectedResult["decodeURI(" + escapedCharacter(c) + ")"] = encodedCharacter(c);
	expectedResult["decodeURI(" + escapedCharacter(c).toLowerCase() + ")"] = encodedCharacter(c).toLowerCase();
	}

	forEachCharacterCode(addNoEscape, "*0123456789-.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_");
	forEachCharacterCode(addEscapeNoEscape, "+/");
	forEachCharacterCode(addURINoEscape, "@#$&+,/:;=?");
	forEachCharacterCode(addURIComponentNoEscape, "!'()~");

	// WinIE 6's escape function does not escape @, although Gecko's does.
	expectedResult["escape(@)"] = "@";

	// --------

	// Run tests.

	var failureCount = 0;

	function test(functionName, parameter, desiredResult)
	{
	var alternateResult = expectedResult[functionName + "(" + parameter + ")"];
	if (alternateResult)
	desiredResult = alternateResult;
	var result = call(functionName, parameter);
	if (result != desiredResult) {
	var s = "called " + functionName + " on " + printable(parameter) + " and got " + printable(result) + " instead of " + printable(desiredResult);
	document.writeln("<p>" + s + "</p>");
	failureCount += 1;
	}
	}

	function testEscapeAndEncode(parameter, escapeExpected, encodeExpected)
	{
	test("escape", parameter, escapeExpected);
	test("encodeURI", parameter, encodeExpected);
	test("encodeURIComponent", parameter, encodeExpected);
	}

	function testUnescape(parameter, expected)
	{
	test("unescape", parameter, expected);
	}

	function testDecode(parameter, expected)
	{
	if (expected == "\uFFFE" \|\| expected == "\uFFFF")
	expected = "\uFFFD";

	test("decodeURI", parameter, expected);
	test("decodeURIComponent", parameter, expected);
	}

	function testUnescapeAndDecode(parameter, expectedUnescape, expectedDecode)
	{
	testUnescape(parameter, expectedUnescape);
	testDecode(parameter, expectedDecode);
	}

	function testCharacter(c)
	{
	var s = String.fromCharCode(c);
	var escaped = escapedCharacter(c);
	var encoded = encodedCharacter(c);

	testEscapeAndEncode(s, escaped, encoded);
	testUnescape(escaped, s);
	testUnescape(escaped.toLowerCase(), s);
	testDecode(encoded, s);
	testDecode(encoded.toLowerCase(), s);
	}

	for (var c = 0; c <= 128; c++) {
	testCharacter(c);
	}
	testCharacter(0x7FF);
	testCharacter(0x800);
	testCharacter(0x2022);
	testCharacter(0xD7FF);
	testCharacter(0xE000);
	testCharacter(0xFFFC);
	testCharacter(0xFFFD);

	// These tests are currently turned off because it's not yet entirely clear what correct behavior
	// is for these cases. Gecko seems to reject values in the surrogate range entirely, yet turns
	// U+FFFE and U+FFFF into U+FFFD, even though Unicode documentation says to treat both the same.
	// And all the JavaScript engines seem to use UTF-16 in a way that prevents characters greater
	// than U+FFFF (outside the BMP) from working properly.

	//testCharacter(0xD800);
	//testCharacter(0xDBFF);
	//testCharacter(0xDC00);
	//testCharacter(0xDFFF);
	//testCharacter(0xFFFE);
	//testCharacter(0xFFFF);
	//testCharacter(0x10000);

	testUnescapeAndDecode("%", "%", "exception");
	testUnescapeAndDecode("%0", "%0", "exception");
	testUnescapeAndDecode("%a", "%a", "exception");
	testUnescapeAndDecode("%u", "%u", "exception");
	testUnescapeAndDecode("%xx", "%xx", "exception");
	testUnescapeAndDecode("%u004", "%u004", "exception");
	testUnescapeAndDecode("%u0041", "A", "exception");
	testUnescapeAndDecode("%uxxxx", "%uxxxx", "exception");

	testUnescapeAndDecode(String.fromCharCode(0x80), String.fromCharCode(0x80), String.fromCharCode(0x80));
	testUnescapeAndDecode(String.fromCharCode(0xD800), String.fromCharCode(0xD800), String.fromCharCode(0xD800));

	testUnescapeAndDecode("%C2%80", String.fromCharCode(0xC2) + String.fromCharCode(0x80), String.fromCharCode(0x80));
	testUnescapeAndDecode("%C2", String.fromCharCode(0xC2), "exception");
	testUnescapeAndDecode("%C2" + String.fromCharCode(0x80), String.fromCharCode(0xC2) + String.fromCharCode(0x80), "exception");

	// The characters below hav to be literal because String.fromCharCode will make a single character
	// and the \u syntax won't allow us to specify characters with Unicode values higher than U+FFFF.
	// For most JavaScript engines, this will turn into two characters because they use UTF-16
	// instead of Unicode; it's not clear to me at the moment if the standard asks for this UTF-16
	// behavior, forbids it, or doesn't say either way.
	testEscapeAndEncode("𐀀", "%uD800%uDC00", "%F0%90%80%80");
	testUnescapeAndDecode("%F0%90%80%80", "\xF0\x90\x80\x80", "𐀀");
	testEscapeAndEncode("𦏵", "%uD858%uDFF5", "%F0%A6%8F%B5");
	testUnescapeAndDecode("%F0%A6%8F%B5", "\xF0\xA6\x8F\xB5", "𦏵");
	testEscapeAndEncode("𯿿", "%uD87F%uDFFF", "%F0%AF%BF%BF");
	testUnescapeAndDecode("%F0%AF%BF%BF", "\xF0\xAF\xBF\xBF", "𯿿");

	// --------

	// Summarize.

	var failuresMessage;
	if (failureCount) {
	failuresMessage = failureCount + " tests failed.";
	} else {
	failuresMessage = "No failures.";
	}
	document.writeln("<p>Testing complete. " + failuresMessage + "</p>");

	// --------

	</script>

	</body>

	</html>