darin | a66ca87 | 2003-08-18 19:23:22 +0000 | [diff] [blame] | 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> |
| 2 | |
| 3 | <html> |
| 4 | |
| 5 | <head> |
| 6 | <meta http-equiv="content-type" content="text/html; charset=UTF-8"> |
| 7 | <title>test of JavaScript URI encoding and decoding methods</title> |
| 8 | </head> |
| 9 | |
| 10 | <body> |
| 11 | |
| 12 | <script type="text/javascript"> |
| 13 | |
rniwa@webkit.org | 03b9c6d | 2012-07-16 01:41:53 +0000 | [diff] [blame] | 14 | if (window.testRunner) testRunner.dumpAsText(); |
mjs | fb8c152 | 2005-08-30 03:46:42 +0000 | [diff] [blame] | 15 | |
darin | a66ca87 | 2003-08-18 19:23:22 +0000 | [diff] [blame] | 16 | // -------- |
| 17 | |
| 18 | // Helper functions. |
| 19 | |
| 20 | function hexDigit(number) |
| 21 | { |
| 22 | if (number >= 10) |
| 23 | return String.fromCharCode(number + 55); |
| 24 | return number; |
| 25 | } |
| 26 | |
| 27 | function printable(s) |
| 28 | { |
| 29 | if (s == "") |
| 30 | return "empty string"; |
| 31 | var p = ""; |
| 32 | for (var i = 0; i < s.length; i++) { |
| 33 | var c = s.charAt(i); |
| 34 | var cc = s.charCodeAt(i); |
| 35 | if (c == "\\") { |
| 36 | p += "\\\\"; |
| 37 | } else if (c == "\"") { |
| 38 | p += "\\\""; |
| 39 | } else if (c == "\n") { |
| 40 | p += "\\n"; |
| 41 | } else if (c == "\r") { |
| 42 | p += "\\r"; |
| 43 | } else if (c == "\t") { |
| 44 | p += "\\t"; |
| 45 | } else if (cc >= 20 && cc < 0x7F) { |
| 46 | p += c; |
| 47 | } else if (cc <= 0xFF) { |
| 48 | p += "\\x" + hexDigit(cc >> 4) + hexDigit(cc & 0xF); |
| 49 | } else if (cc <= 0xFFFF) { |
| 50 | p += "\\u" + hexDigit((cc >> 12) & 0xF) + hexDigit((cc >> 8) & 0xF) + hexDigit((cc >> 4) & 0xF) + hexDigit(cc & 0xF); |
| 51 | } else { |
| 52 | p += "\\U" + hexDigit((cc >> 28) & 0xF) + hexDigit((cc >> 24) & 0xF) + hexDigit((cc >> 20) & 0xF) + hexDigit((cc >> 16) & 0xF) |
| 53 | + hexDigit((cc >> 12) & 0xF) + hexDigit((cc >> 8) & 0xF) + hexDigit((cc >> 4) & 0xF) + hexDigit(cc & 0xF); |
| 54 | } |
| 55 | } |
| 56 | return "\"" + p + "\""; |
| 57 | } |
| 58 | |
mjs | 8536301 | 2004-09-14 22:14:01 +0000 | [diff] [blame] | 59 | function encodedCharacter(c) |
darin | a66ca87 | 2003-08-18 19:23:22 +0000 | [diff] [blame] | 60 | { |
| 61 | // UTF-8 is what Gecko does, but not what WinIE 6 does. |
| 62 | // It makes much more sense, though, to produce encodings that actually work in URLs. |
| 63 | // So for JavaScriptCore, we want to match Gecko on this, WinIE on most other things. |
| 64 | |
| 65 | // Instead of writing a JavaScript implementation of UTF-8 escaping, just do some specific cases here. |
| 66 | switch (c) { |
| 67 | case 0x80: return "%C2%80"; |
| 68 | case 0x7FF: return "%DF%BF"; |
| 69 | case 0x800: return "%E0%A0%80"; |
| 70 | case 0x2022: return "%E2%80%A2"; |
| 71 | case 0xD7FF: return "%ED%9F%BF"; |
| 72 | case 0xD800: return "%ED%A0%80"; |
| 73 | case 0xE000: return "%EE%80%80"; |
| 74 | case 0xFFFC: return "%EF%BF%BC"; |
| 75 | case 0xFFFD: return "%EF%BF%BD"; |
mjs | 8536301 | 2004-09-14 22:14:01 +0000 | [diff] [blame] | 76 | } |
| 77 | |
| 78 | if (c < 0 || c > 0x7F) { |
| 79 | window.alert("encodedCharacter doesn't know how to escape character code " + c); |
| 80 | return "?"; |
| 81 | } |
| 82 | |
| 83 | return "%" + hexDigit(c >> 4) + hexDigit(c - (c >> 4 << 4)); |
| 84 | } |
| 85 | |
| 86 | function escapedCharacter(c) |
| 87 | { |
| 88 | // UTF-8 is what Gecko does, but we do what Win IE does instead. We use Latin1 for |
| 89 | // unicode characters less than or equal to 255, and %u encoding for everything else. |
| 90 | switch (c) { |
| 91 | case 0x80: return "%80"; |
| 92 | case 0x7FF: return "%u07FF"; |
| 93 | case 0x800: return "%u0800"; |
| 94 | case 0x2022: return "%u2022"; |
| 95 | case 0xD7FF: return "%uD7FF"; |
| 96 | case 0xD800: return "%uD800"; |
| 97 | case 0xE000: return "%uE000"; |
| 98 | case 0xFFFC: return "%uFFFC"; |
| 99 | case 0xFFFD: return "%uFFFD"; |
darin | a66ca87 | 2003-08-18 19:23:22 +0000 | [diff] [blame] | 100 | } |
| 101 | |
| 102 | if (c < 0 || c > 0x7F) { |
| 103 | window.alert("escapedCharacter doesn't know how to escape character code " + c); |
| 104 | return "?"; |
| 105 | } |
| 106 | |
| 107 | return "%" + hexDigit(c >> 4) + hexDigit(c - (c >> 4 << 4)); |
| 108 | } |
| 109 | |
| 110 | function forEachCharacterCode(f, s) |
| 111 | { |
| 112 | for (var i = 0; i < s.length; i++) { |
| 113 | f(s.charCodeAt(i)); |
| 114 | } |
| 115 | } |
| 116 | |
| 117 | function call(functionName, parameter) |
| 118 | { |
| 119 | try { |
| 120 | result = eval(functionName + "(parameter)"); |
| 121 | } catch (e) { |
| 122 | result = "exception"; |
| 123 | } |
| 124 | return result; |
| 125 | } |
| 126 | |
| 127 | // -------- |
| 128 | |
| 129 | // Build up tables with expected results. |
| 130 | |
| 131 | var expectedResult = new Object; |
| 132 | |
| 133 | function addExpectedNonEscaped(f, c) |
| 134 | { |
| 135 | expectedResult[f + "(" + String.fromCharCode(c) + ")"] = String.fromCharCode(c); |
| 136 | } |
| 137 | |
| 138 | function addNoEscape(c) |
| 139 | { |
| 140 | addExpectedNonEscaped("escape", c); |
| 141 | addExpectedNonEscaped("encodeURI", c); |
| 142 | addExpectedNonEscaped("encodeURIComponent", c); |
| 143 | } |
| 144 | |
| 145 | function addEscapeNoEscape(c) |
| 146 | { |
| 147 | addExpectedNonEscaped("escape", c); |
| 148 | } |
| 149 | |
| 150 | function addURIComponentNoEscape(c) |
| 151 | { |
| 152 | addExpectedNonEscaped("encodeURI", c); |
| 153 | addExpectedNonEscaped("encodeURIComponent", c); |
| 154 | } |
| 155 | |
| 156 | function addURINoEscape(c) |
| 157 | { |
| 158 | addExpectedNonEscaped("encodeURI", c); |
mjs | 8536301 | 2004-09-14 22:14:01 +0000 | [diff] [blame] | 159 | expectedResult["decodeURI(" + escapedCharacter(c) + ")"] = encodedCharacter(c); |
| 160 | expectedResult["decodeURI(" + escapedCharacter(c).toLowerCase() + ")"] = encodedCharacter(c).toLowerCase(); |
darin | a66ca87 | 2003-08-18 19:23:22 +0000 | [diff] [blame] | 161 | } |
| 162 | |
| 163 | forEachCharacterCode(addNoEscape, "*0123456789-.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_"); |
| 164 | forEachCharacterCode(addEscapeNoEscape, "+/"); |
| 165 | forEachCharacterCode(addURINoEscape, "@#$&+,/:;=?"); |
| 166 | forEachCharacterCode(addURIComponentNoEscape, "!'()~"); |
| 167 | |
| 168 | // WinIE 6's escape function does not escape @, although Gecko's does. |
| 169 | expectedResult["escape(@)"] = "@"; |
| 170 | |
| 171 | // -------- |
| 172 | |
| 173 | // Run tests. |
| 174 | |
| 175 | var failureCount = 0; |
| 176 | |
| 177 | function test(functionName, parameter, desiredResult) |
| 178 | { |
| 179 | var alternateResult = expectedResult[functionName + "(" + parameter + ")"]; |
| 180 | if (alternateResult) |
| 181 | desiredResult = alternateResult; |
| 182 | var result = call(functionName, parameter); |
| 183 | if (result != desiredResult) { |
| 184 | var s = "called " + functionName + " on " + printable(parameter) + " and got " + printable(result) + " instead of " + printable(desiredResult); |
darin | 49bf77a | 2003-09-23 06:34:09 +0000 | [diff] [blame] | 185 | document.writeln("<p>" + s + "</p>"); |
darin | a66ca87 | 2003-08-18 19:23:22 +0000 | [diff] [blame] | 186 | failureCount += 1; |
| 187 | } |
| 188 | } |
| 189 | |
mjs | 8536301 | 2004-09-14 22:14:01 +0000 | [diff] [blame] | 190 | function testEscapeAndEncode(parameter, escapeExpected, encodeExpected) |
darin | a66ca87 | 2003-08-18 19:23:22 +0000 | [diff] [blame] | 191 | { |
mjs | 8536301 | 2004-09-14 22:14:01 +0000 | [diff] [blame] | 192 | test("escape", parameter, escapeExpected); |
| 193 | test("encodeURI", parameter, encodeExpected); |
| 194 | test("encodeURIComponent", parameter, encodeExpected); |
darin | a66ca87 | 2003-08-18 19:23:22 +0000 | [diff] [blame] | 195 | } |
| 196 | |
| 197 | function testUnescape(parameter, expected) |
| 198 | { |
| 199 | test("unescape", parameter, expected); |
| 200 | } |
| 201 | |
| 202 | function testDecode(parameter, expected) |
| 203 | { |
| 204 | if (expected == "\uFFFE" || expected == "\uFFFF") |
| 205 | expected = "\uFFFD"; |
| 206 | |
| 207 | test("decodeURI", parameter, expected); |
| 208 | test("decodeURIComponent", parameter, expected); |
| 209 | } |
| 210 | |
| 211 | function testUnescapeAndDecode(parameter, expectedUnescape, expectedDecode) |
| 212 | { |
| 213 | testUnescape(parameter, expectedUnescape); |
| 214 | testDecode(parameter, expectedDecode); |
| 215 | } |
| 216 | |
| 217 | function testCharacter(c) |
| 218 | { |
| 219 | var s = String.fromCharCode(c); |
| 220 | var escaped = escapedCharacter(c); |
mjs | 8536301 | 2004-09-14 22:14:01 +0000 | [diff] [blame] | 221 | var encoded = encodedCharacter(c); |
darin | a66ca87 | 2003-08-18 19:23:22 +0000 | [diff] [blame] | 222 | |
mjs | 8536301 | 2004-09-14 22:14:01 +0000 | [diff] [blame] | 223 | testEscapeAndEncode(s, escaped, encoded); |
darin | a66ca87 | 2003-08-18 19:23:22 +0000 | [diff] [blame] | 224 | testUnescape(escaped, s); |
| 225 | testUnescape(escaped.toLowerCase(), s); |
mjs | 8536301 | 2004-09-14 22:14:01 +0000 | [diff] [blame] | 226 | testDecode(encoded, s); |
| 227 | testDecode(encoded.toLowerCase(), s); |
darin | a66ca87 | 2003-08-18 19:23:22 +0000 | [diff] [blame] | 228 | } |
| 229 | |
| 230 | for (var c = 0; c <= 128; c++) { |
| 231 | testCharacter(c); |
| 232 | } |
| 233 | testCharacter(0x7FF); |
| 234 | testCharacter(0x800); |
| 235 | testCharacter(0x2022); |
| 236 | testCharacter(0xD7FF); |
| 237 | testCharacter(0xE000); |
| 238 | testCharacter(0xFFFC); |
| 239 | testCharacter(0xFFFD); |
| 240 | |
| 241 | // These tests are currently turned off because it's not yet entirely clear what correct behavior |
| 242 | // is for these cases. Gecko seems to reject values in the surrogate range entirely, yet turns |
| 243 | // U+FFFE and U+FFFF into U+FFFD, even though Unicode documentation says to treat both the same. |
| 244 | // And all the JavaScript engines seem to use UTF-16 in a way that prevents characters greater |
| 245 | // than U+FFFF (outside the BMP) from working properly. |
| 246 | |
| 247 | //testCharacter(0xD800); |
| 248 | //testCharacter(0xDBFF); |
| 249 | //testCharacter(0xDC00); |
| 250 | //testCharacter(0xDFFF); |
| 251 | //testCharacter(0xFFFE); |
| 252 | //testCharacter(0xFFFF); |
| 253 | //testCharacter(0x10000); |
| 254 | |
| 255 | testUnescapeAndDecode("%", "%", "exception"); |
| 256 | testUnescapeAndDecode("%0", "%0", "exception"); |
| 257 | testUnescapeAndDecode("%a", "%a", "exception"); |
| 258 | testUnescapeAndDecode("%u", "%u", "exception"); |
| 259 | testUnescapeAndDecode("%xx", "%xx", "exception"); |
| 260 | testUnescapeAndDecode("%u004", "%u004", "exception"); |
| 261 | testUnescapeAndDecode("%u0041", "A", "exception"); |
| 262 | testUnescapeAndDecode("%uxxxx", "%uxxxx", "exception"); |
| 263 | |
| 264 | testUnescapeAndDecode(String.fromCharCode(0x80), String.fromCharCode(0x80), String.fromCharCode(0x80)); |
| 265 | testUnescapeAndDecode(String.fromCharCode(0xD800), String.fromCharCode(0xD800), String.fromCharCode(0xD800)); |
| 266 | |
mjs | 8536301 | 2004-09-14 22:14:01 +0000 | [diff] [blame] | 267 | testUnescapeAndDecode("%C2%80", String.fromCharCode(0xC2) + String.fromCharCode(0x80), String.fromCharCode(0x80)); |
| 268 | testUnescapeAndDecode("%C2", String.fromCharCode(0xC2), "exception"); |
| 269 | testUnescapeAndDecode("%C2" + String.fromCharCode(0x80), String.fromCharCode(0xC2) + String.fromCharCode(0x80), "exception"); |
darin | a66ca87 | 2003-08-18 19:23:22 +0000 | [diff] [blame] | 270 | |
| 271 | // The characters below hav to be literal because String.fromCharCode will make a single character |
| 272 | // and the \u syntax won't allow us to specify characters with Unicode values higher than U+FFFF. |
| 273 | // For most JavaScript engines, this will turn into two characters because they use UTF-16 |
| 274 | // instead of Unicode; it's not clear to me at the moment if the standard asks for this UTF-16 |
| 275 | // behavior, forbids it, or doesn't say either way. |
mjs | 8536301 | 2004-09-14 22:14:01 +0000 | [diff] [blame] | 276 | testEscapeAndEncode("𐀀", "%uD800%uDC00", "%F0%90%80%80"); |
| 277 | testUnescapeAndDecode("%F0%90%80%80", "\xF0\x90\x80\x80", "𐀀"); |
| 278 | testEscapeAndEncode("𦏵", "%uD858%uDFF5", "%F0%A6%8F%B5"); |
| 279 | testUnescapeAndDecode("%F0%A6%8F%B5", "\xF0\xA6\x8F\xB5", "𦏵"); |
| 280 | testEscapeAndEncode("", "%uD87F%uDFFF", "%F0%AF%BF%BF"); |
| 281 | testUnescapeAndDecode("%F0%AF%BF%BF", "\xF0\xAF\xBF\xBF", ""); |
darin | a66ca87 | 2003-08-18 19:23:22 +0000 | [diff] [blame] | 282 | |
| 283 | // -------- |
| 284 | |
| 285 | // Summarize. |
| 286 | |
| 287 | var failuresMessage; |
| 288 | if (failureCount) { |
| 289 | failuresMessage = failureCount + " tests failed."; |
| 290 | } else { |
| 291 | failuresMessage = "No failures."; |
| 292 | } |
| 293 | document.writeln("<p>Testing complete. " + failuresMessage + "</p>"); |
| 294 | |
| 295 | // -------- |
| 296 | |
| 297 | </script> |
| 298 | |
| 299 | </body> |
| 300 | |
| 301 | </html> |