| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> |
| |
| <html> |
| |
| <head> |
| <meta http-equiv="content-type" content="text/html; charset=UTF-8"> |
| <title>test of JavaScript URI encoding and decoding methods</title> |
| </head> |
| |
| <body> |
| |
| <script type="text/javascript"> |
| |
| if (window.testRunner) testRunner.dumpAsText(); |
| |
| // -------- |
| |
| // Helper functions. |
| |
| function hexDigit(number) |
| { |
| if (number >= 10) |
| return String.fromCharCode(number + 55); |
| return number; |
| } |
| |
| function printable(s) |
| { |
| if (s == "") |
| return "empty string"; |
| var p = ""; |
| for (var i = 0; i < s.length; i++) { |
| var c = s.charAt(i); |
| var cc = s.charCodeAt(i); |
| if (c == "\\") { |
| p += "\\\\"; |
| } else if (c == "\"") { |
| p += "\\\""; |
| } else if (c == "\n") { |
| p += "\\n"; |
| } else if (c == "\r") { |
| p += "\\r"; |
| } else if (c == "\t") { |
| p += "\\t"; |
| } else if (cc >= 20 && cc < 0x7F) { |
| p += c; |
| } else if (cc <= 0xFF) { |
| p += "\\x" + hexDigit(cc >> 4) + hexDigit(cc & 0xF); |
| } else if (cc <= 0xFFFF) { |
| p += "\\u" + hexDigit((cc >> 12) & 0xF) + hexDigit((cc >> 8) & 0xF) + hexDigit((cc >> 4) & 0xF) + hexDigit(cc & 0xF); |
| } else { |
| p += "\\U" + hexDigit((cc >> 28) & 0xF) + hexDigit((cc >> 24) & 0xF) + hexDigit((cc >> 20) & 0xF) + hexDigit((cc >> 16) & 0xF) |
| + hexDigit((cc >> 12) & 0xF) + hexDigit((cc >> 8) & 0xF) + hexDigit((cc >> 4) & 0xF) + hexDigit(cc & 0xF); |
| } |
| } |
| return "\"" + p + "\""; |
| } |
| |
| function encodedCharacter(c) |
| { |
| // UTF-8 is what Gecko does, but not what WinIE 6 does. |
| // It makes much more sense, though, to produce encodings that actually work in URLs. |
| // So for JavaScriptCore, we want to match Gecko on this, WinIE on most other things. |
| |
| // Instead of writing a JavaScript implementation of UTF-8 escaping, just do some specific cases here. |
| switch (c) { |
| case 0x80: return "%C2%80"; |
| case 0x7FF: return "%DF%BF"; |
| case 0x800: return "%E0%A0%80"; |
| case 0x2022: return "%E2%80%A2"; |
| case 0xD7FF: return "%ED%9F%BF"; |
| case 0xD800: return "%ED%A0%80"; |
| case 0xE000: return "%EE%80%80"; |
| case 0xFFFC: return "%EF%BF%BC"; |
| case 0xFFFD: return "%EF%BF%BD"; |
| } |
| |
| if (c < 0 || c > 0x7F) { |
| window.alert("encodedCharacter doesn't know how to escape character code " + c); |
| return "?"; |
| } |
| |
| return "%" + hexDigit(c >> 4) + hexDigit(c - (c >> 4 << 4)); |
| } |
| |
| function escapedCharacter(c) |
| { |
| // UTF-8 is what Gecko does, but we do what Win IE does instead. We use Latin1 for |
| // unicode characters less than or equal to 255, and %u encoding for everything else. |
| switch (c) { |
| case 0x80: return "%80"; |
| case 0x7FF: return "%u07FF"; |
| case 0x800: return "%u0800"; |
| case 0x2022: return "%u2022"; |
| case 0xD7FF: return "%uD7FF"; |
| case 0xD800: return "%uD800"; |
| case 0xE000: return "%uE000"; |
| case 0xFFFC: return "%uFFFC"; |
| case 0xFFFD: return "%uFFFD"; |
| } |
| |
| if (c < 0 || c > 0x7F) { |
| window.alert("escapedCharacter doesn't know how to escape character code " + c); |
| return "?"; |
| } |
| |
| return "%" + hexDigit(c >> 4) + hexDigit(c - (c >> 4 << 4)); |
| } |
| |
| function forEachCharacterCode(f, s) |
| { |
| for (var i = 0; i < s.length; i++) { |
| f(s.charCodeAt(i)); |
| } |
| } |
| |
| function call(functionName, parameter) |
| { |
| try { |
| result = eval(functionName + "(parameter)"); |
| } catch (e) { |
| result = "exception"; |
| } |
| return result; |
| } |
| |
| // -------- |
| |
| // Build up tables with expected results. |
| |
| var expectedResult = new Object; |
| |
| function addExpectedNonEscaped(f, c) |
| { |
| expectedResult[f + "(" + String.fromCharCode(c) + ")"] = String.fromCharCode(c); |
| } |
| |
| function addNoEscape(c) |
| { |
| addExpectedNonEscaped("escape", c); |
| addExpectedNonEscaped("encodeURI", c); |
| addExpectedNonEscaped("encodeURIComponent", c); |
| } |
| |
| function addEscapeNoEscape(c) |
| { |
| addExpectedNonEscaped("escape", c); |
| } |
| |
| function addURIComponentNoEscape(c) |
| { |
| addExpectedNonEscaped("encodeURI", c); |
| addExpectedNonEscaped("encodeURIComponent", c); |
| } |
| |
| function addURINoEscape(c) |
| { |
| addExpectedNonEscaped("encodeURI", c); |
| expectedResult["decodeURI(" + escapedCharacter(c) + ")"] = encodedCharacter(c); |
| expectedResult["decodeURI(" + escapedCharacter(c).toLowerCase() + ")"] = encodedCharacter(c).toLowerCase(); |
| } |
| |
| forEachCharacterCode(addNoEscape, "*0123456789-.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_"); |
| forEachCharacterCode(addEscapeNoEscape, "+/"); |
| forEachCharacterCode(addURINoEscape, "@#$&+,/:;=?"); |
| forEachCharacterCode(addURIComponentNoEscape, "!'()~"); |
| |
| // WinIE 6's escape function does not escape @, although Gecko's does. |
| expectedResult["escape(@)"] = "@"; |
| |
| // -------- |
| |
| // Run tests. |
| |
| var failureCount = 0; |
| |
| function test(functionName, parameter, desiredResult) |
| { |
| var alternateResult = expectedResult[functionName + "(" + parameter + ")"]; |
| if (alternateResult) |
| desiredResult = alternateResult; |
| var result = call(functionName, parameter); |
| if (result != desiredResult) { |
| var s = "called " + functionName + " on " + printable(parameter) + " and got " + printable(result) + " instead of " + printable(desiredResult); |
| document.writeln("<p>" + s + "</p>"); |
| failureCount += 1; |
| } |
| } |
| |
| function testEscapeAndEncode(parameter, escapeExpected, encodeExpected) |
| { |
| test("escape", parameter, escapeExpected); |
| test("encodeURI", parameter, encodeExpected); |
| test("encodeURIComponent", parameter, encodeExpected); |
| } |
| |
| function testUnescape(parameter, expected) |
| { |
| test("unescape", parameter, expected); |
| } |
| |
| function testDecode(parameter, expected) |
| { |
| if (expected == "\uFFFE" || expected == "\uFFFF") |
| expected = "\uFFFD"; |
| |
| test("decodeURI", parameter, expected); |
| test("decodeURIComponent", parameter, expected); |
| } |
| |
| function testUnescapeAndDecode(parameter, expectedUnescape, expectedDecode) |
| { |
| testUnescape(parameter, expectedUnescape); |
| testDecode(parameter, expectedDecode); |
| } |
| |
| function testCharacter(c) |
| { |
| var s = String.fromCharCode(c); |
| var escaped = escapedCharacter(c); |
| var encoded = encodedCharacter(c); |
| |
| testEscapeAndEncode(s, escaped, encoded); |
| testUnescape(escaped, s); |
| testUnescape(escaped.toLowerCase(), s); |
| testDecode(encoded, s); |
| testDecode(encoded.toLowerCase(), s); |
| } |
| |
| for (var c = 0; c <= 128; c++) { |
| testCharacter(c); |
| } |
| testCharacter(0x7FF); |
| testCharacter(0x800); |
| testCharacter(0x2022); |
| testCharacter(0xD7FF); |
| testCharacter(0xE000); |
| testCharacter(0xFFFC); |
| testCharacter(0xFFFD); |
| |
| // These tests are currently turned off because it's not yet entirely clear what correct behavior |
| // is for these cases. Gecko seems to reject values in the surrogate range entirely, yet turns |
| // U+FFFE and U+FFFF into U+FFFD, even though Unicode documentation says to treat both the same. |
| // And all the JavaScript engines seem to use UTF-16 in a way that prevents characters greater |
| // than U+FFFF (outside the BMP) from working properly. |
| |
| //testCharacter(0xD800); |
| //testCharacter(0xDBFF); |
| //testCharacter(0xDC00); |
| //testCharacter(0xDFFF); |
| //testCharacter(0xFFFE); |
| //testCharacter(0xFFFF); |
| //testCharacter(0x10000); |
| |
| testUnescapeAndDecode("%", "%", "exception"); |
| testUnescapeAndDecode("%0", "%0", "exception"); |
| testUnescapeAndDecode("%a", "%a", "exception"); |
| testUnescapeAndDecode("%u", "%u", "exception"); |
| testUnescapeAndDecode("%xx", "%xx", "exception"); |
| testUnescapeAndDecode("%u004", "%u004", "exception"); |
| testUnescapeAndDecode("%u0041", "A", "exception"); |
| testUnescapeAndDecode("%uxxxx", "%uxxxx", "exception"); |
| |
| testUnescapeAndDecode(String.fromCharCode(0x80), String.fromCharCode(0x80), String.fromCharCode(0x80)); |
| testUnescapeAndDecode(String.fromCharCode(0xD800), String.fromCharCode(0xD800), String.fromCharCode(0xD800)); |
| |
| testUnescapeAndDecode("%C2%80", String.fromCharCode(0xC2) + String.fromCharCode(0x80), String.fromCharCode(0x80)); |
| testUnescapeAndDecode("%C2", String.fromCharCode(0xC2), "exception"); |
| testUnescapeAndDecode("%C2" + String.fromCharCode(0x80), String.fromCharCode(0xC2) + String.fromCharCode(0x80), "exception"); |
| |
| // The characters below hav to be literal because String.fromCharCode will make a single character |
| // and the \u syntax won't allow us to specify characters with Unicode values higher than U+FFFF. |
| // For most JavaScript engines, this will turn into two characters because they use UTF-16 |
| // instead of Unicode; it's not clear to me at the moment if the standard asks for this UTF-16 |
| // behavior, forbids it, or doesn't say either way. |
| testEscapeAndEncode("𐀀", "%uD800%uDC00", "%F0%90%80%80"); |
| testUnescapeAndDecode("%F0%90%80%80", "\xF0\x90\x80\x80", "𐀀"); |
| testEscapeAndEncode("𦏵", "%uD858%uDFF5", "%F0%A6%8F%B5"); |
| testUnescapeAndDecode("%F0%A6%8F%B5", "\xF0\xA6\x8F\xB5", "𦏵"); |
| testEscapeAndEncode("", "%uD87F%uDFFF", "%F0%AF%BF%BF"); |
| testUnescapeAndDecode("%F0%AF%BF%BF", "\xF0\xAF\xBF\xBF", ""); |
| |
| // -------- |
| |
| // Summarize. |
| |
| var failuresMessage; |
| if (failureCount) { |
| failuresMessage = failureCount + " tests failed."; |
| } else { |
| failuresMessage = "No failures."; |
| } |
| document.writeln("<p>Testing complete. " + failuresMessage + "</p>"); |
| |
| // -------- |
| |
| </script> |
| |
| </body> |
| |
| </html> |