blob: d13daba61d24758b0a062a9695a1e1825094c46e [file] [log] [blame]
weinig@apple.com16fcb7f2011-06-04 18:55:17 +00001# Copyright (C) 2011 Apple Inc. All rights reserved.
commit-queue@webkit.org874368a2012-04-19 22:44:40 +00002# Copyright (C) 2012 Sony Network Entertainment. All rights reserved.
oliver@apple.comc3ce5432011-06-03 23:30:22 +00003#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions
6# are met:
7# 1. Redistributions of source code must retain the above copyright
8# notice, this list of conditions and the following disclaimer.
9# 2. Redistributions in binary form must reproduce the above copyright
10# notice, this list of conditions and the following disclaimer in the
11# documentation and/or other materials provided with the distribution.
12#
13# THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24
25import sys
26import string
27import operator
28
29keywordsText = open(sys.argv[1]).read()
30
commit-queue@webkit.org874368a2012-04-19 22:44:40 +000031# A second argument signifies that the output
32# should be redirected to a file
33redirect_to_file = len(sys.argv) > 2
34
35# Change stdout to point to the file if requested
36if redirect_to_file:
37 file_output = open(sys.argv[-1], "w")
38 sys.stdout = file_output
39
oliver@apple.comc3ce5432011-06-03 23:30:22 +000040# Observed weights of the most common keywords, rounded to 2.s.d
41keyWordWeights = {
42 "catch": 0.01,
43 "try": 0.01,
44 "while": 0.01,
45 "case": 0.01,
46 "break": 0.01,
47 "new": 0.01,
48 "in": 0.01,
49 "typeof": 0.02,
50 "true": 0.02,
51 "false": 0.02,
52 "for": 0.03,
53 "null": 0.03,
54 "else": 0.03,
55 "return": 0.13,
56 "var": 0.13,
57 "if": 0.16,
58 "function": 0.18,
59 "this": 0.18,
60}
61
62
63def allWhitespace(str):
64 for c in str:
65 if not(c in string.whitespace):
66 return False
67 return True
68
69
70def parseKeywords(keywordsText):
commit-queue@webkit.org268f0ca2013-04-08 17:51:01 +000071
72 if sys.platform == "cygwin":
73 keywordsText = keywordsText.replace("\r\n", "\n")
74
oliver@apple.comc3ce5432011-06-03 23:30:22 +000075 lines = keywordsText.split("\n")
76 lines = [line.split("#")[0] for line in lines]
77 lines = [line for line in lines if (not allWhitespace(line))]
78 name = lines[0].split()
79 terminator = lines[-1]
commit-queue@webkit.org268f0ca2013-04-08 17:51:01 +000080
oliver@apple.comc3ce5432011-06-03 23:30:22 +000081 if not name[0] == "@begin":
82 raise Exception("expected description beginning with @begin")
83 if not terminator == "@end":
84 raise Exception("expected description ending with @end")
85
86 lines = lines[1:-1] # trim off the old heading
87 return [line.split() for line in lines]
88
89
90def makePadding(size):
91 str = ""
92 for i in range(size):
93 str = str + " "
94 return str
95
96
97class Trie:
98 def __init__(self, prefix):
99 self.prefix = prefix
100 self.keys = {}
101 self.value = None
102
103 def insert(self, key, value):
104 if len(key) == 0:
105 self.value = value
106 return
107 if not (key[0] in self.keys):
108 self.keys[key[0]] = Trie(key[0])
109 self.keys[key[0]].insert(key[1:], value)
110
111 def coalesce(self):
112 keys = {}
113 for k, v in self.keys.items():
114 t = v.coalesce()
115 keys[t.prefix] = t
116 self.keys = keys
117 if self.value != None:
118 return self
119 if len(self.keys) != 1:
120 return self
commit-queue@webkit.org384cc8e2011-06-14 16:25:04 +0000121 # Python 3: for() loop for compatibility. Use next() when Python 2.6 is the baseline.
122 for (prefix, suffix) in self.keys.items():
123 res = Trie(self.prefix + prefix)
124 res.value = suffix.value
125 res.keys = suffix.keys
126 return res
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000127
128 def fillOut(self, prefix=""):
129 self.fullPrefix = prefix + self.prefix
130 weight = 0
131 if self.fullPrefix in keyWordWeights:
132 weight = weight + keyWordWeights[self.fullPrefix]
133 self.selfWeight = weight
134 for trie in self.keys.values():
135 trie.fillOut(self.fullPrefix)
136 weight = weight + trie.weight
137 self.keys = [(trie.prefix, trie) for trie in sorted(self.keys.values(), key=operator.attrgetter('weight'), reverse=True)]
138 self.weight = weight
139
msaboff@apple.com11c2e642011-11-07 17:54:15 +0000140 def printSubTreeAsC(self, typeName, indent):
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000141 str = makePadding(indent)
142
143 if self.value != None:
msaboff@apple.com171db662015-01-14 18:48:58 +0000144 print(str + "if (!isIdentPartIncludingEscape(code+%d, m_codeEnd)) {" % (len(self.fullPrefix)))
msaboff@apple.com11c2e642011-11-07 17:54:15 +0000145 print(str + " internalShift<%d>();" % len(self.fullPrefix))
oliver@apple.com77099a62011-06-04 06:15:43 +0000146 print(str + " if (shouldCreateIdentifier)")
ggaren@apple.com9a9a4b52013-04-18 19:32:17 +0000147 print(str + (" data->ident = &m_vm->propertyNames->%sKeyword;" % self.fullPrefix))
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000148 print(str + " return " + self.value + ";")
149 print(str + "}")
150 rootIndex = len(self.fullPrefix)
151 itemCount = 0
152 for k, trie in self.keys:
153 baseIndex = rootIndex
154 if (baseIndex > 0) and (len(k) == 3):
155 baseIndex = baseIndex - 1
156 k = trie.fullPrefix[baseIndex] + k
157 test = [("'%s'" % c) for c in k]
158 if len(test) == 1:
159 comparison = "code[%d] == %s" % (baseIndex, test[0])
160 else:
161 base = "code"
162 if baseIndex > 0:
163 base = "code + %d" % baseIndex
msaboff@apple.com11c2e642011-11-07 17:54:15 +0000164 comparison = ("COMPARE_%d%sS(%s, " % (len(test), typeName, base)) + ", ".join(test) + ")"
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000165 if itemCount == 0:
166 print(str + "if (" + comparison + ") {")
167 else:
weinig@apple.com16fcb7f2011-06-04 18:55:17 +0000168 print(str + "} else if (" + comparison + ") {")
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000169
msaboff@apple.com11c2e642011-11-07 17:54:15 +0000170 trie.printSubTreeAsC(typeName, indent + 4)
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000171 itemCount = itemCount + 1
weinig@apple.com16fcb7f2011-06-04 18:55:17 +0000172
173 if itemCount == len(self.keys):
174 print(str + "}")
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000175
176 def maxLength(self):
177 max = len(self.fullPrefix)
178 for (_, trie) in self.keys:
179 l = trie.maxLength()
180 if l > max:
181 max = l
182 return max
183
184 def printAsC(self):
185 print("namespace JSC {")
weinig@apple.com16fcb7f2011-06-04 18:55:17 +0000186 print("")
msaboff@apple.com171db662015-01-14 18:48:58 +0000187 print("static ALWAYS_INLINE bool isIdentPartIncludingEscape(const LChar* code, const LChar* codeEnd);")
188 print("static ALWAYS_INLINE bool isIdentPartIncludingEscape(const UChar* code, const UChar* codeEnd);")
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000189 # max length + 1 so we don't need to do any bounds checking at all
190 print("static const int maxTokenLength = %d;" % (self.maxLength() + 1))
weinig@apple.com16fcb7f2011-06-04 18:55:17 +0000191 print("")
msaboff@apple.com11c2e642011-11-07 17:54:15 +0000192 print("template <>")
193 print("template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseKeyword(JSTokenData* data)")
weinig@apple.com16fcb7f2011-06-04 18:55:17 +0000194 print("{")
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000195 print(" ASSERT(m_codeEnd - m_code >= maxTokenLength);")
weinig@apple.com16fcb7f2011-06-04 18:55:17 +0000196 print("")
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000197 print(" const UChar* code = m_code;")
msaboff@apple.com11c2e642011-11-07 17:54:15 +0000198 self.printSubTreeAsC("UCHAR", 4)
199 print(" return IDENT;")
200 print("}")
201 print("")
202 print("template <>")
203 print("template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseKeyword(JSTokenData* data)")
204 print("{")
205 print(" ASSERT(m_codeEnd - m_code >= maxTokenLength);")
206 print("")
207 print(" const LChar* code = m_code;")
208 self.printSubTreeAsC("CHAR", 4)
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000209 print(" return IDENT;")
210 print("}")
weinig@apple.com16fcb7f2011-06-04 18:55:17 +0000211 print("")
212 print("} // namespace JSC")
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000213
214keywords = parseKeywords(keywordsText)
215trie = Trie("")
216for k, v in keywords:
217 trie.insert(k, v)
218trie.coalesce()
219trie.fillOut()
220print("// This file was generated by KeywordLookupGenerator.py. Do not edit.")
221print("""
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000222#if CPU(NEEDS_ALIGNED_ACCESS)
223
msaboff@apple.com11c2e642011-11-07 17:54:15 +0000224#define COMPARE_2CHARS(address, char1, char2) \\
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000225 (((address)[0] == char1) && ((address)[1] == char2))
msaboff@apple.com11c2e642011-11-07 17:54:15 +0000226#define COMPARE_4CHARS(address, char1, char2, char3, char4) \\
227 (COMPARE_2CHARS(address, char1, char2) && COMPARE_2CHARS((address) + 2, char3, char4))
228#define COMPARE_2UCHARS(address, char1, char2) \\
229 (((address)[0] == char1) && ((address)[1] == char2))
230#define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\
231 (COMPARE_2UCHARS(address, char1, char2) && COMPARE_2UCHARS((address) + 2, char3, char4))
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000232
weinig@apple.com16fcb7f2011-06-04 18:55:17 +0000233#else // CPU(NEEDS_ALIGNED_ACCESS)
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000234
235#if CPU(BIG_ENDIAN)
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000236
msaboff@apple.com11c2e642011-11-07 17:54:15 +0000237#define CHARPAIR_TOUINT16(a, b) \\
238 ((((uint16_t)(a)) << 8) + (uint16_t)(b))
239#define CHARQUAD_TOUINT32(a, b, c, d) \\
240 ((((uint32_t)(CHARPAIR_TOUINT16(a, b))) << 16) + CHARPAIR_TOUINT16(c, d))
241#define UCHARPAIR_TOUINT32(a, b) \\
weinig@apple.com16fcb7f2011-06-04 18:55:17 +0000242 ((((uint32_t)(a)) << 16) + (uint32_t)(b))
msaboff@apple.com11c2e642011-11-07 17:54:15 +0000243#define UCHARQUAD_TOUINT64(a, b, c, d) \\
244 ((((uint64_t)(UCHARQUAD_TOUINT64(a, b))) << 32) + UCHARPAIR_TOUINT32(c, d))
weinig@apple.com16fcb7f2011-06-04 18:55:17 +0000245
246#else // CPU(BIG_ENDIAN)
247
msaboff@apple.com11c2e642011-11-07 17:54:15 +0000248#define CHARPAIR_TOUINT16(a, b) \\
249 ((((uint16_t)(b)) << 8) + (uint16_t)(a))
250#define CHARQUAD_TOUINT32(a, b, c, d) \\
251 ((((uint32_t)(CHARPAIR_TOUINT16(c, d))) << 16) + CHARPAIR_TOUINT16(a, b))
252#define UCHARPAIR_TOUINT32(a, b) \\
weinig@apple.com16fcb7f2011-06-04 18:55:17 +0000253 ((((uint32_t)(b)) << 16) + (uint32_t)(a))
msaboff@apple.com11c2e642011-11-07 17:54:15 +0000254#define UCHARQUAD_TOUINT64(a, b, c, d) \\
255 ((((uint64_t)(UCHARPAIR_TOUINT32(c, d))) << 32) + UCHARPAIR_TOUINT32(a, b))
weinig@apple.com16fcb7f2011-06-04 18:55:17 +0000256
257#endif // CPU(BIG_ENDIAN)
258
259
msaboff@apple.com11c2e642011-11-07 17:54:15 +0000260#define COMPARE_2CHARS(address, char1, char2) \\
zandobersek@gmail.comfbef8ed2013-08-26 18:45:21 +0000261 ((reinterpret_cast<const uint16_t*>(address))[0] == CHARPAIR_TOUINT16(char1, char2))
msaboff@apple.com11c2e642011-11-07 17:54:15 +0000262#define COMPARE_2UCHARS(address, char1, char2) \\
zandobersek@gmail.comfbef8ed2013-08-26 18:45:21 +0000263 ((reinterpret_cast<const uint32_t*>(address))[0] == UCHARPAIR_TOUINT32(char1, char2))
weinig@apple.com16fcb7f2011-06-04 18:55:17 +0000264
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000265#if CPU(X86_64)
266
msaboff@apple.com11c2e642011-11-07 17:54:15 +0000267#define COMPARE_4CHARS(address, char1, char2, char3, char4) \\
zandobersek@gmail.comfbef8ed2013-08-26 18:45:21 +0000268 ((reinterpret_cast<const uint32_t*>(address))[0] == CHARQUAD_TOUINT32(char1, char2, char3, char4))
msaboff@apple.com11c2e642011-11-07 17:54:15 +0000269#define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\
zandobersek@gmail.comfbef8ed2013-08-26 18:45:21 +0000270 ((reinterpret_cast<const uint64_t*>(address))[0] == UCHARQUAD_TOUINT64(char1, char2, char3, char4))
weinig@apple.com16fcb7f2011-06-04 18:55:17 +0000271
272#else // CPU(X86_64)
273
msaboff@apple.com11c2e642011-11-07 17:54:15 +0000274#define COMPARE_4CHARS(address, char1, char2, char3, char4) \\
275 (COMPARE_2CHARS(address, char1, char2) && COMPARE_2CHARS((address) + 2, char3, char4))
276#define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\
277 (COMPARE_2UCHARS(address, char1, char2) && COMPARE_2UCHARS((address) + 2, char3, char4))
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000278
weinig@apple.com16fcb7f2011-06-04 18:55:17 +0000279#endif // CPU(X86_64)
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000280
weinig@apple.com16fcb7f2011-06-04 18:55:17 +0000281#endif // CPU(NEEDS_ALIGNED_ACCESS)
282
msaboff@apple.com11c2e642011-11-07 17:54:15 +0000283#define COMPARE_3CHARS(address, char1, char2, char3) \\
284 (COMPARE_2CHARS(address, char1, char2) && ((address)[2] == (char3)))
285#define COMPARE_3UCHARS(address, char1, char2, char3) \\
286 (COMPARE_2UCHARS(address, char1, char2) && ((address)[2] == (char3)))
287#define COMPARE_5CHARS(address, char1, char2, char3, char4, char5) \\
288 (COMPARE_4CHARS(address, char1, char2, char3, char4) && ((address)[4] == (char5)))
289#define COMPARE_5UCHARS(address, char1, char2, char3, char4, char5) \\
290 (COMPARE_4UCHARS(address, char1, char2, char3, char4) && ((address)[4] == (char5)))
291#define COMPARE_6CHARS(address, char1, char2, char3, char4, char5, char6) \\
292 (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_2CHARS(address + 4, char5, char6))
293#define COMPARE_6UCHARS(address, char1, char2, char3, char4, char5, char6) \\
294 (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_2UCHARS(address + 4, char5, char6))
295#define COMPARE_7CHARS(address, char1, char2, char3, char4, char5, char6, char7) \\
296 (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_4CHARS(address + 3, char4, char5, char6, char7))
297#define COMPARE_7UCHARS(address, char1, char2, char3, char4, char5, char6, char7) \\
298 (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_4UCHARS(address + 3, char4, char5, char6, char7))
299#define COMPARE_8CHARS(address, char1, char2, char3, char4, char5, char6, char7, char8) \\
300 (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_4CHARS(address + 4, char5, char6, char7, char8))
301#define COMPARE_8UCHARS(address, char1, char2, char3, char4, char5, char6, char7, char8) \\
302 (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_4UCHARS(address + 4, char5, char6, char7, char8))
oliver@apple.comc3ce5432011-06-03 23:30:22 +0000303""")
304
305trie.printAsC()
commit-queue@webkit.org874368a2012-04-19 22:44:40 +0000306
307# Close the redirected file if requested
308if (redirect_to_file):
309 file_output.close()
310 sys.stdout = sys.__stdout__