blob: 9184e3f6da731573ba23a9302727c7af5bea3efe [file] [log] [blame]
darina66ca872003-08-18 19:23:22 +00001<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
2
3<html>
4
5<head>
6<meta http-equiv="content-type" content="text/html; charset=UTF-8">
7<title>test of JavaScript URI encoding and decoding methods</title>
8</head>
9
10<body>
11
12<script type="text/javascript">
13
rniwa@webkit.org03b9c6d2012-07-16 01:41:53 +000014if (window.testRunner) testRunner.dumpAsText();
mjsfb8c1522005-08-30 03:46:42 +000015
darina66ca872003-08-18 19:23:22 +000016// --------
17
18// Helper functions.
19
20function hexDigit(number)
21{
22 if (number >= 10)
23 return String.fromCharCode(number + 55);
24 return number;
25}
26
27function printable(s)
28{
29 if (s == "")
30 return "empty string";
31 var p = "";
32 for (var i = 0; i < s.length; i++) {
33 var c = s.charAt(i);
34 var cc = s.charCodeAt(i);
35 if (c == "\\") {
36 p += "\\\\";
37 } else if (c == "\"") {
38 p += "\\\"";
39 } else if (c == "\n") {
40 p += "\\n";
41 } else if (c == "\r") {
42 p += "\\r";
43 } else if (c == "\t") {
44 p += "\\t";
45 } else if (cc >= 20 && cc < 0x7F) {
46 p += c;
47 } else if (cc <= 0xFF) {
48 p += "\\x" + hexDigit(cc >> 4) + hexDigit(cc & 0xF);
49 } else if (cc <= 0xFFFF) {
50 p += "\\u" + hexDigit((cc >> 12) & 0xF) + hexDigit((cc >> 8) & 0xF) + hexDigit((cc >> 4) & 0xF) + hexDigit(cc & 0xF);
51 } else {
52 p += "\\U" + hexDigit((cc >> 28) & 0xF) + hexDigit((cc >> 24) & 0xF) + hexDigit((cc >> 20) & 0xF) + hexDigit((cc >> 16) & 0xF)
53 + hexDigit((cc >> 12) & 0xF) + hexDigit((cc >> 8) & 0xF) + hexDigit((cc >> 4) & 0xF) + hexDigit(cc & 0xF);
54 }
55 }
56 return "\"" + p + "\"";
57}
58
mjs85363012004-09-14 22:14:01 +000059function encodedCharacter(c)
darina66ca872003-08-18 19:23:22 +000060{
61 // UTF-8 is what Gecko does, but not what WinIE 6 does.
62 // It makes much more sense, though, to produce encodings that actually work in URLs.
63 // So for JavaScriptCore, we want to match Gecko on this, WinIE on most other things.
64
65 // Instead of writing a JavaScript implementation of UTF-8 escaping, just do some specific cases here.
66 switch (c) {
67 case 0x80: return "%C2%80";
68 case 0x7FF: return "%DF%BF";
69 case 0x800: return "%E0%A0%80";
70 case 0x2022: return "%E2%80%A2";
71 case 0xD7FF: return "%ED%9F%BF";
72 case 0xD800: return "%ED%A0%80";
73 case 0xE000: return "%EE%80%80";
74 case 0xFFFC: return "%EF%BF%BC";
75 case 0xFFFD: return "%EF%BF%BD";
mjs85363012004-09-14 22:14:01 +000076 }
77
78 if (c < 0 || c > 0x7F) {
79 window.alert("encodedCharacter doesn't know how to escape character code " + c);
80 return "?";
81 }
82
83 return "%" + hexDigit(c >> 4) + hexDigit(c - (c >> 4 << 4));
84}
85
86function escapedCharacter(c)
87{
88 // UTF-8 is what Gecko does, but we do what Win IE does instead. We use Latin1 for
89 // unicode characters less than or equal to 255, and %u encoding for everything else.
90 switch (c) {
91 case 0x80: return "%80";
92 case 0x7FF: return "%u07FF";
93 case 0x800: return "%u0800";
94 case 0x2022: return "%u2022";
95 case 0xD7FF: return "%uD7FF";
96 case 0xD800: return "%uD800";
97 case 0xE000: return "%uE000";
98 case 0xFFFC: return "%uFFFC";
99 case 0xFFFD: return "%uFFFD";
darina66ca872003-08-18 19:23:22 +0000100 }
101
102 if (c < 0 || c > 0x7F) {
103 window.alert("escapedCharacter doesn't know how to escape character code " + c);
104 return "?";
105 }
106
107 return "%" + hexDigit(c >> 4) + hexDigit(c - (c >> 4 << 4));
108}
109
110function forEachCharacterCode(f, s)
111{
112 for (var i = 0; i < s.length; i++) {
113 f(s.charCodeAt(i));
114 }
115}
116
117function call(functionName, parameter)
118{
119 try {
120 result = eval(functionName + "(parameter)");
121 } catch (e) {
122 result = "exception";
123 }
124 return result;
125}
126
127// --------
128
129// Build up tables with expected results.
130
131var expectedResult = new Object;
132
133function addExpectedNonEscaped(f, c)
134{
135 expectedResult[f + "(" + String.fromCharCode(c) + ")"] = String.fromCharCode(c);
136}
137
138function addNoEscape(c)
139{
140 addExpectedNonEscaped("escape", c);
141 addExpectedNonEscaped("encodeURI", c);
142 addExpectedNonEscaped("encodeURIComponent", c);
143}
144
145function addEscapeNoEscape(c)
146{
147 addExpectedNonEscaped("escape", c);
148}
149
150function addURIComponentNoEscape(c)
151{
152 addExpectedNonEscaped("encodeURI", c);
153 addExpectedNonEscaped("encodeURIComponent", c);
154}
155
156function addURINoEscape(c)
157{
158 addExpectedNonEscaped("encodeURI", c);
mjs85363012004-09-14 22:14:01 +0000159 expectedResult["decodeURI(" + escapedCharacter(c) + ")"] = encodedCharacter(c);
160 expectedResult["decodeURI(" + escapedCharacter(c).toLowerCase() + ")"] = encodedCharacter(c).toLowerCase();
darina66ca872003-08-18 19:23:22 +0000161}
162
163forEachCharacterCode(addNoEscape, "*0123456789-.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_");
164forEachCharacterCode(addEscapeNoEscape, "+/");
165forEachCharacterCode(addURINoEscape, "@#$&+,/:;=?");
166forEachCharacterCode(addURIComponentNoEscape, "!'()~");
167
168// WinIE 6's escape function does not escape @, although Gecko's does.
169expectedResult["escape(@)"] = "@";
170
171// --------
172
173// Run tests.
174
175var failureCount = 0;
176
177function test(functionName, parameter, desiredResult)
178{
179 var alternateResult = expectedResult[functionName + "(" + parameter + ")"];
180 if (alternateResult)
181 desiredResult = alternateResult;
182 var result = call(functionName, parameter);
183 if (result != desiredResult) {
184 var s = "called " + functionName + " on " + printable(parameter) + " and got " + printable(result) + " instead of " + printable(desiredResult);
darin49bf77a2003-09-23 06:34:09 +0000185 document.writeln("<p>" + s + "</p>");
darina66ca872003-08-18 19:23:22 +0000186 failureCount += 1;
187 }
188}
189
mjs85363012004-09-14 22:14:01 +0000190function testEscapeAndEncode(parameter, escapeExpected, encodeExpected)
darina66ca872003-08-18 19:23:22 +0000191{
mjs85363012004-09-14 22:14:01 +0000192 test("escape", parameter, escapeExpected);
193 test("encodeURI", parameter, encodeExpected);
194 test("encodeURIComponent", parameter, encodeExpected);
darina66ca872003-08-18 19:23:22 +0000195}
196
197function testUnescape(parameter, expected)
198{
199 test("unescape", parameter, expected);
200}
201
202function testDecode(parameter, expected)
203{
204 if (expected == "\uFFFE" || expected == "\uFFFF")
205 expected = "\uFFFD";
206
207 test("decodeURI", parameter, expected);
208 test("decodeURIComponent", parameter, expected);
209}
210
211function testUnescapeAndDecode(parameter, expectedUnescape, expectedDecode)
212{
213 testUnescape(parameter, expectedUnescape);
214 testDecode(parameter, expectedDecode);
215}
216
217function testCharacter(c)
218{
219 var s = String.fromCharCode(c);
220 var escaped = escapedCharacter(c);
mjs85363012004-09-14 22:14:01 +0000221 var encoded = encodedCharacter(c);
darina66ca872003-08-18 19:23:22 +0000222
mjs85363012004-09-14 22:14:01 +0000223 testEscapeAndEncode(s, escaped, encoded);
darina66ca872003-08-18 19:23:22 +0000224 testUnescape(escaped, s);
225 testUnescape(escaped.toLowerCase(), s);
mjs85363012004-09-14 22:14:01 +0000226 testDecode(encoded, s);
227 testDecode(encoded.toLowerCase(), s);
darina66ca872003-08-18 19:23:22 +0000228}
229
230for (var c = 0; c <= 128; c++) {
231 testCharacter(c);
232}
233testCharacter(0x7FF);
234testCharacter(0x800);
235testCharacter(0x2022);
236testCharacter(0xD7FF);
237testCharacter(0xE000);
238testCharacter(0xFFFC);
239testCharacter(0xFFFD);
240
241// These tests are currently turned off because it's not yet entirely clear what correct behavior
242// is for these cases. Gecko seems to reject values in the surrogate range entirely, yet turns
243// U+FFFE and U+FFFF into U+FFFD, even though Unicode documentation says to treat both the same.
244// And all the JavaScript engines seem to use UTF-16 in a way that prevents characters greater
245// than U+FFFF (outside the BMP) from working properly.
246
247//testCharacter(0xD800);
248//testCharacter(0xDBFF);
249//testCharacter(0xDC00);
250//testCharacter(0xDFFF);
251//testCharacter(0xFFFE);
252//testCharacter(0xFFFF);
253//testCharacter(0x10000);
254
255testUnescapeAndDecode("%", "%", "exception");
256testUnescapeAndDecode("%0", "%0", "exception");
257testUnescapeAndDecode("%a", "%a", "exception");
258testUnescapeAndDecode("%u", "%u", "exception");
259testUnescapeAndDecode("%xx", "%xx", "exception");
260testUnescapeAndDecode("%u004", "%u004", "exception");
261testUnescapeAndDecode("%u0041", "A", "exception");
262testUnescapeAndDecode("%uxxxx", "%uxxxx", "exception");
263
264testUnescapeAndDecode(String.fromCharCode(0x80), String.fromCharCode(0x80), String.fromCharCode(0x80));
265testUnescapeAndDecode(String.fromCharCode(0xD800), String.fromCharCode(0xD800), String.fromCharCode(0xD800));
266
mjs85363012004-09-14 22:14:01 +0000267testUnescapeAndDecode("%C2%80", String.fromCharCode(0xC2) + String.fromCharCode(0x80), String.fromCharCode(0x80));
268testUnescapeAndDecode("%C2", String.fromCharCode(0xC2), "exception");
269testUnescapeAndDecode("%C2" + String.fromCharCode(0x80), String.fromCharCode(0xC2) + String.fromCharCode(0x80), "exception");
darina66ca872003-08-18 19:23:22 +0000270
271// The characters below hav to be literal because String.fromCharCode will make a single character
272// and the \u syntax won't allow us to specify characters with Unicode values higher than U+FFFF.
273// For most JavaScript engines, this will turn into two characters because they use UTF-16
274// instead of Unicode; it's not clear to me at the moment if the standard asks for this UTF-16
275// behavior, forbids it, or doesn't say either way.
mjs85363012004-09-14 22:14:01 +0000276testEscapeAndEncode("𐀀", "%uD800%uDC00", "%F0%90%80%80");
277testUnescapeAndDecode("%F0%90%80%80", "\xF0\x90\x80\x80", "𐀀");
278testEscapeAndEncode("𦏵", "%uD858%uDFF5", "%F0%A6%8F%B5");
279testUnescapeAndDecode("%F0%A6%8F%B5", "\xF0\xA6\x8F\xB5", "𦏵");
280testEscapeAndEncode("𯿿", "%uD87F%uDFFF", "%F0%AF%BF%BF");
281testUnescapeAndDecode("%F0%AF%BF%BF", "\xF0\xAF\xBF\xBF", "𯿿");
darina66ca872003-08-18 19:23:22 +0000282
283// --------
284
285// Summarize.
286
287var failuresMessage;
288if (failureCount) {
289 failuresMessage = failureCount + " tests failed.";
290} else {
291 failuresMessage = "No failures.";
292}
293document.writeln("<p>Testing complete. " + failuresMessage + "</p>");
294
295// --------
296
297</script>
298
299</body>
300
301</html>