Blame - LayoutTests/js/dom/encode-URI-test.html - WebKit

blob: 9184e3f6da731573ba23a9302727c7af5bea3efe [file] [log] [blame]

darin	a66ca87	2003-08-18 19:23:22 +0000	[diff] [blame]	1	<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
				2
				3	<html>
				4
				5	<head>
				6	<meta http-equiv="content-type" content="text/html; charset=UTF-8">
				7	<title>test of JavaScript URI encoding and decoding methods</title>
				8	</head>
				9
				10	<body>
				11
				12	<script type="text/javascript">
				13
rniwa@webkit.org	03b9c6d	2012-07-16 01:41:53 +0000	[diff] [blame]	14	if (window.testRunner) testRunner.dumpAsText();
mjs	fb8c152	2005-08-30 03:46:42 +0000	[diff] [blame]	15
darin	a66ca87	2003-08-18 19:23:22 +0000	[diff] [blame]	16	// --------
				17
				18	// Helper functions.
				19
				20	function hexDigit(number)
				21	{
				22	if (number >= 10)
				23	return String.fromCharCode(number + 55);
				24	return number;
				25	}
				26
				27	function printable(s)
				28	{
				29	if (s == "")
				30	return "empty string";
				31	var p = "";
				32	for (var i = 0; i < s.length; i++) {
				33	var c = s.charAt(i);
				34	var cc = s.charCodeAt(i);
				35	if (c == "\\") {
				36	p += "\\\\";
				37	} else if (c == "\"") {
				38	p += "\\\"";
				39	} else if (c == "\n") {
				40	p += "\\n";
				41	} else if (c == "\r") {
				42	p += "\\r";
				43	} else if (c == "\t") {
				44	p += "\\t";
				45	} else if (cc >= 20 && cc < 0x7F) {
				46	p += c;
				47	} else if (cc <= 0xFF) {
				48	p += "\\x" + hexDigit(cc >> 4) + hexDigit(cc & 0xF);
				49	} else if (cc <= 0xFFFF) {
				50	p += "\\u" + hexDigit((cc >> 12) & 0xF) + hexDigit((cc >> 8) & 0xF) + hexDigit((cc >> 4) & 0xF) + hexDigit(cc & 0xF);
				51	} else {
				52	p += "\\U" + hexDigit((cc >> 28) & 0xF) + hexDigit((cc >> 24) & 0xF) + hexDigit((cc >> 20) & 0xF) + hexDigit((cc >> 16) & 0xF)
				53	+ hexDigit((cc >> 12) & 0xF) + hexDigit((cc >> 8) & 0xF) + hexDigit((cc >> 4) & 0xF) + hexDigit(cc & 0xF);
				54	}
				55	}
				56	return "\"" + p + "\"";
				57	}
				58
mjs	8536301	2004-09-14 22:14:01 +0000	[diff] [blame]	59	function encodedCharacter(c)
darin	a66ca87	2003-08-18 19:23:22 +0000	[diff] [blame]	60	{
				61	// UTF-8 is what Gecko does, but not what WinIE 6 does.
				62	// It makes much more sense, though, to produce encodings that actually work in URLs.
				63	// So for JavaScriptCore, we want to match Gecko on this, WinIE on most other things.
				64
				65	// Instead of writing a JavaScript implementation of UTF-8 escaping, just do some specific cases here.
				66	switch (c) {
				67	case 0x80: return "%C2%80";
				68	case 0x7FF: return "%DF%BF";
				69	case 0x800: return "%E0%A0%80";
				70	case 0x2022: return "%E2%80%A2";
				71	case 0xD7FF: return "%ED%9F%BF";
				72	case 0xD800: return "%ED%A0%80";
				73	case 0xE000: return "%EE%80%80";
				74	case 0xFFFC: return "%EF%BF%BC";
				75	case 0xFFFD: return "%EF%BF%BD";
mjs	8536301	2004-09-14 22:14:01 +0000	[diff] [blame]	76	}
				77
				78	if (c < 0 \|\| c > 0x7F) {
				79	window.alert("encodedCharacter doesn't know how to escape character code " + c);
				80	return "?";
				81	}
				82
				83	return "%" + hexDigit(c >> 4) + hexDigit(c - (c >> 4 << 4));
				84	}
				85
				86	function escapedCharacter(c)
				87	{
				88	// UTF-8 is what Gecko does, but we do what Win IE does instead. We use Latin1 for
				89	// unicode characters less than or equal to 255, and %u encoding for everything else.
				90	switch (c) {
				91	case 0x80: return "%80";
				92	case 0x7FF: return "%u07FF";
				93	case 0x800: return "%u0800";
				94	case 0x2022: return "%u2022";
				95	case 0xD7FF: return "%uD7FF";
				96	case 0xD800: return "%uD800";
				97	case 0xE000: return "%uE000";
				98	case 0xFFFC: return "%uFFFC";
				99	case 0xFFFD: return "%uFFFD";
darin	a66ca87	2003-08-18 19:23:22 +0000	[diff] [blame]	100	}
				101
				102	if (c < 0 \|\| c > 0x7F) {
				103	window.alert("escapedCharacter doesn't know how to escape character code " + c);
				104	return "?";
				105	}
				106
				107	return "%" + hexDigit(c >> 4) + hexDigit(c - (c >> 4 << 4));
				108	}
				109
				110	function forEachCharacterCode(f, s)
				111	{
				112	for (var i = 0; i < s.length; i++) {
				113	f(s.charCodeAt(i));
				114	}
				115	}
				116
				117	function call(functionName, parameter)
				118	{
				119	try {
				120	result = eval(functionName + "(parameter)");
				121	} catch (e) {
				122	result = "exception";
				123	}
				124	return result;
				125	}
				126
				127	// --------
				128
				129	// Build up tables with expected results.
				130
				131	var expectedResult = new Object;
				132
				133	function addExpectedNonEscaped(f, c)
				134	{
				135	expectedResult[f + "(" + String.fromCharCode(c) + ")"] = String.fromCharCode(c);
				136	}
				137
				138	function addNoEscape(c)
				139	{
				140	addExpectedNonEscaped("escape", c);
				141	addExpectedNonEscaped("encodeURI", c);
				142	addExpectedNonEscaped("encodeURIComponent", c);
				143	}
				144
				145	function addEscapeNoEscape(c)
				146	{
				147	addExpectedNonEscaped("escape", c);
				148	}
				149
				150	function addURIComponentNoEscape(c)
				151	{
				152	addExpectedNonEscaped("encodeURI", c);
				153	addExpectedNonEscaped("encodeURIComponent", c);
				154	}
				155
				156	function addURINoEscape(c)
				157	{
				158	addExpectedNonEscaped("encodeURI", c);
mjs	8536301	2004-09-14 22:14:01 +0000	[diff] [blame]	159	expectedResult["decodeURI(" + escapedCharacter(c) + ")"] = encodedCharacter(c);
				160	expectedResult["decodeURI(" + escapedCharacter(c).toLowerCase() + ")"] = encodedCharacter(c).toLowerCase();
darin	a66ca87	2003-08-18 19:23:22 +0000	[diff] [blame]	161	}
				162
				163	forEachCharacterCode(addNoEscape, "*0123456789-.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_");
				164	forEachCharacterCode(addEscapeNoEscape, "+/");
				165	forEachCharacterCode(addURINoEscape, "@#$&+,/:;=?");
				166	forEachCharacterCode(addURIComponentNoEscape, "!'()~");
				167
				168	// WinIE 6's escape function does not escape @, although Gecko's does.
				169	expectedResult["escape(@)"] = "@";
				170
				171	// --------
				172
				173	// Run tests.
				174
				175	var failureCount = 0;
				176
				177	function test(functionName, parameter, desiredResult)
				178	{
				179	var alternateResult = expectedResult[functionName + "(" + parameter + ")"];
				180	if (alternateResult)
				181	desiredResult = alternateResult;
				182	var result = call(functionName, parameter);
				183	if (result != desiredResult) {
				184	var s = "called " + functionName + " on " + printable(parameter) + " and got " + printable(result) + " instead of " + printable(desiredResult);
darin	49bf77a	2003-09-23 06:34:09 +0000	[diff] [blame]	185	document.writeln("<p>" + s + "</p>");
darin	a66ca87	2003-08-18 19:23:22 +0000	[diff] [blame]	186	failureCount += 1;
				187	}
				188	}
				189
mjs	8536301	2004-09-14 22:14:01 +0000	[diff] [blame]	190	function testEscapeAndEncode(parameter, escapeExpected, encodeExpected)
darin	a66ca87	2003-08-18 19:23:22 +0000	[diff] [blame]	191	{
mjs	8536301	2004-09-14 22:14:01 +0000	[diff] [blame]	192	test("escape", parameter, escapeExpected);
				193	test("encodeURI", parameter, encodeExpected);
				194	test("encodeURIComponent", parameter, encodeExpected);
darin	a66ca87	2003-08-18 19:23:22 +0000	[diff] [blame]	195	}
				196
				197	function testUnescape(parameter, expected)
				198	{
				199	test("unescape", parameter, expected);
				200	}
				201
				202	function testDecode(parameter, expected)
				203	{
				204	if (expected == "\uFFFE" \|\| expected == "\uFFFF")
				205	expected = "\uFFFD";
				206
				207	test("decodeURI", parameter, expected);
				208	test("decodeURIComponent", parameter, expected);
				209	}
				210
				211	function testUnescapeAndDecode(parameter, expectedUnescape, expectedDecode)
				212	{
				213	testUnescape(parameter, expectedUnescape);
				214	testDecode(parameter, expectedDecode);
				215	}
				216
				217	function testCharacter(c)
				218	{
				219	var s = String.fromCharCode(c);
				220	var escaped = escapedCharacter(c);
mjs	8536301	2004-09-14 22:14:01 +0000	[diff] [blame]	221	var encoded = encodedCharacter(c);
darin	a66ca87	2003-08-18 19:23:22 +0000	[diff] [blame]	222
mjs	8536301	2004-09-14 22:14:01 +0000	[diff] [blame]	223	testEscapeAndEncode(s, escaped, encoded);
darin	a66ca87	2003-08-18 19:23:22 +0000	[diff] [blame]	224	testUnescape(escaped, s);
				225	testUnescape(escaped.toLowerCase(), s);
mjs	8536301	2004-09-14 22:14:01 +0000	[diff] [blame]	226	testDecode(encoded, s);
				227	testDecode(encoded.toLowerCase(), s);
darin	a66ca87	2003-08-18 19:23:22 +0000	[diff] [blame]	228	}
				229
				230	for (var c = 0; c <= 128; c++) {
				231	testCharacter(c);
				232	}
				233	testCharacter(0x7FF);
				234	testCharacter(0x800);
				235	testCharacter(0x2022);
				236	testCharacter(0xD7FF);
				237	testCharacter(0xE000);
				238	testCharacter(0xFFFC);
				239	testCharacter(0xFFFD);
				240
				241	// These tests are currently turned off because it's not yet entirely clear what correct behavior
				242	// is for these cases. Gecko seems to reject values in the surrogate range entirely, yet turns
				243	// U+FFFE and U+FFFF into U+FFFD, even though Unicode documentation says to treat both the same.
				244	// And all the JavaScript engines seem to use UTF-16 in a way that prevents characters greater
				245	// than U+FFFF (outside the BMP) from working properly.
				246
				247	//testCharacter(0xD800);
				248	//testCharacter(0xDBFF);
				249	//testCharacter(0xDC00);
				250	//testCharacter(0xDFFF);
				251	//testCharacter(0xFFFE);
				252	//testCharacter(0xFFFF);
				253	//testCharacter(0x10000);
				254
				255	testUnescapeAndDecode("%", "%", "exception");
				256	testUnescapeAndDecode("%0", "%0", "exception");
				257	testUnescapeAndDecode("%a", "%a", "exception");
				258	testUnescapeAndDecode("%u", "%u", "exception");
				259	testUnescapeAndDecode("%xx", "%xx", "exception");
				260	testUnescapeAndDecode("%u004", "%u004", "exception");
				261	testUnescapeAndDecode("%u0041", "A", "exception");
				262	testUnescapeAndDecode("%uxxxx", "%uxxxx", "exception");
				263
				264	testUnescapeAndDecode(String.fromCharCode(0x80), String.fromCharCode(0x80), String.fromCharCode(0x80));
				265	testUnescapeAndDecode(String.fromCharCode(0xD800), String.fromCharCode(0xD800), String.fromCharCode(0xD800));
				266
mjs	8536301	2004-09-14 22:14:01 +0000	[diff] [blame]	267	testUnescapeAndDecode("%C2%80", String.fromCharCode(0xC2) + String.fromCharCode(0x80), String.fromCharCode(0x80));
				268	testUnescapeAndDecode("%C2", String.fromCharCode(0xC2), "exception");
				269	testUnescapeAndDecode("%C2" + String.fromCharCode(0x80), String.fromCharCode(0xC2) + String.fromCharCode(0x80), "exception");
darin	a66ca87	2003-08-18 19:23:22 +0000	[diff] [blame]	270
				271	// The characters below hav to be literal because String.fromCharCode will make a single character
				272	// and the \u syntax won't allow us to specify characters with Unicode values higher than U+FFFF.
				273	// For most JavaScript engines, this will turn into two characters because they use UTF-16
				274	// instead of Unicode; it's not clear to me at the moment if the standard asks for this UTF-16
				275	// behavior, forbids it, or doesn't say either way.
mjs	8536301	2004-09-14 22:14:01 +0000	[diff] [blame]	276	testEscapeAndEncode("𐀀", "%uD800%uDC00", "%F0%90%80%80");
				277	testUnescapeAndDecode("%F0%90%80%80", "\xF0\x90\x80\x80", "𐀀");
				278	testEscapeAndEncode("𦏵", "%uD858%uDFF5", "%F0%A6%8F%B5");
				279	testUnescapeAndDecode("%F0%A6%8F%B5", "\xF0\xA6\x8F\xB5", "𦏵");
				280	testEscapeAndEncode("𯿿", "%uD87F%uDFFF", "%F0%AF%BF%BF");
				281	testUnescapeAndDecode("%F0%AF%BF%BF", "\xF0\xAF\xBF\xBF", "𯿿");
darin	a66ca87	2003-08-18 19:23:22 +0000	[diff] [blame]	282
				283	// --------
				284
				285	// Summarize.
				286
				287	var failuresMessage;
				288	if (failureCount) {
				289	failuresMessage = failureCount + " tests failed.";
				290	} else {
				291	failuresMessage = "No failures.";
				292	}
				293	document.writeln("<p>Testing complete. " + failuresMessage + "</p>");
				294
				295	// --------
				296
				297	</script>
				298
				299	</body>
				300
				301	</html>