Keyword lookup can use memcmp to get around unaligned load undefined behavior
https://bugs.webkit.org/show_bug.cgi?id=199650
Patch by Saagar Jha <saagarjha@apple.com> on 2019-07-15
Reviewed by Yusuke Suzuki.
Replace KeywordLookup's hand-rolled "memcmp" with the standard version, which reduces the need to deal with
endianness and unaligned loads.
* KeywordLookupGenerator.py:
(Trie.printSubTreeAsC): Use memcmp instead of macros to test for matches.
(Trie.printAsC): Unspecialize Lexer::parseKeyword as templating over the character type reduces the amount of
code we need to generate and moves this task out of the Python script and into the C++ compiler.
git-svn-id: http://svn.webkit.org/repository/webkit/trunk@247463 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/Source/JavaScriptCore/ChangeLog b/Source/JavaScriptCore/ChangeLog
index 5f477ee..88e9523 100644
--- a/Source/JavaScriptCore/ChangeLog
+++ b/Source/JavaScriptCore/ChangeLog
@@ -1,3 +1,18 @@
+2019-07-15 Saagar Jha <saagarjha@apple.com>
+
+ Keyword lookup can use memcmp to get around unaligned load undefined behavior
+ https://bugs.webkit.org/show_bug.cgi?id=199650
+
+ Reviewed by Yusuke Suzuki.
+
+ Replace KeywordLookup's hand-rolled "memcmp" with the standard version, which reduces the need to deal with
+ endianness and unaligned loads.
+
+ * KeywordLookupGenerator.py:
+ (Trie.printSubTreeAsC): Use memcmp instead of macros to test for matches.
+ (Trie.printAsC): Unspecialize Lexer::parseKeyword as templating over the character type reduces the amount of
+ code we need to generate and moves this task out of the Python script and into the C++ compiler.
+
2019-07-15 Yusuke Suzuki <ysuzuki@apple.com>
[JSC] Improve wasm wpt test results by fixing miscellaneous issues
diff --git a/Source/JavaScriptCore/KeywordLookupGenerator.py b/Source/JavaScriptCore/KeywordLookupGenerator.py
index d13daba..f640d5a 100644
--- a/Source/JavaScriptCore/KeywordLookupGenerator.py
+++ b/Source/JavaScriptCore/KeywordLookupGenerator.py
@@ -137,11 +137,11 @@
self.keys = [(trie.prefix, trie) for trie in sorted(self.keys.values(), key=operator.attrgetter('weight'), reverse=True)]
self.weight = weight
- def printSubTreeAsC(self, typeName, indent):
+ def printSubTreeAsC(self, indent):
str = makePadding(indent)
if self.value != None:
- print(str + "if (!isIdentPartIncludingEscape(code+%d, m_codeEnd)) {" % (len(self.fullPrefix)))
+ print(str + "if (!isIdentPartIncludingEscape(code + %d, m_codeEnd)) {" % (len(self.fullPrefix)))
print(str + " internalShift<%d>();" % len(self.fullPrefix))
print(str + " if (shouldCreateIdentifier)")
print(str + (" data->ident = &m_vm->propertyNames->%sKeyword;" % self.fullPrefix))
@@ -155,19 +155,16 @@
baseIndex = baseIndex - 1
k = trie.fullPrefix[baseIndex] + k
test = [("'%s'" % c) for c in k]
- if len(test) == 1:
- comparison = "code[%d] == %s" % (baseIndex, test[0])
- else:
- base = "code"
- if baseIndex > 0:
- base = "code + %d" % baseIndex
- comparison = ("COMPARE_%d%sS(%s, " % (len(test), typeName, base)) + ", ".join(test) + ")"
+ base = "code + %d" % baseIndex
+ length = __builtins__.str(len(test))
+ needle = "(std::array<Char, " + length + ">{{" + ", ".join(test) + "}}).data()"
+ comparison = ("!memcmp(%s, " % (base)) + needle + ", " + length + " * sizeof(Char))"
if itemCount == 0:
print(str + "if (" + comparison + ") {")
else:
print(str + "} else if (" + comparison + ") {")
- trie.printSubTreeAsC(typeName, indent + 4)
+ trie.printSubTreeAsC(indent + 4)
itemCount = itemCount + 1
if itemCount == len(self.keys):
@@ -189,23 +186,13 @@
# max length + 1 so we don't need to do any bounds checking at all
print("static const int maxTokenLength = %d;" % (self.maxLength() + 1))
print("")
- print("template <>")
- print("template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseKeyword(JSTokenData* data)")
+ print("template <typename Char>")
+ print("template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<Char>::parseKeyword(JSTokenData* data)")
print("{")
print(" ASSERT(m_codeEnd - m_code >= maxTokenLength);")
print("")
- print(" const UChar* code = m_code;")
- self.printSubTreeAsC("UCHAR", 4)
- print(" return IDENT;")
- print("}")
- print("")
- print("template <>")
- print("template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseKeyword(JSTokenData* data)")
- print("{")
- print(" ASSERT(m_codeEnd - m_code >= maxTokenLength);")
- print("")
- print(" const LChar* code = m_code;")
- self.printSubTreeAsC("CHAR", 4)
+ print(" const Char* code = m_code;")
+ self.printSubTreeAsC(4)
print(" return IDENT;")
print("}")
print("")
@@ -218,89 +205,6 @@
trie.coalesce()
trie.fillOut()
print("// This file was generated by KeywordLookupGenerator.py. Do not edit.")
-print("""
-#if CPU(NEEDS_ALIGNED_ACCESS)
-
-#define COMPARE_2CHARS(address, char1, char2) \\
- (((address)[0] == char1) && ((address)[1] == char2))
-#define COMPARE_4CHARS(address, char1, char2, char3, char4) \\
- (COMPARE_2CHARS(address, char1, char2) && COMPARE_2CHARS((address) + 2, char3, char4))
-#define COMPARE_2UCHARS(address, char1, char2) \\
- (((address)[0] == char1) && ((address)[1] == char2))
-#define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\
- (COMPARE_2UCHARS(address, char1, char2) && COMPARE_2UCHARS((address) + 2, char3, char4))
-
-#else // CPU(NEEDS_ALIGNED_ACCESS)
-
-#if CPU(BIG_ENDIAN)
-
-#define CHARPAIR_TOUINT16(a, b) \\
- ((((uint16_t)(a)) << 8) + (uint16_t)(b))
-#define CHARQUAD_TOUINT32(a, b, c, d) \\
- ((((uint32_t)(CHARPAIR_TOUINT16(a, b))) << 16) + CHARPAIR_TOUINT16(c, d))
-#define UCHARPAIR_TOUINT32(a, b) \\
- ((((uint32_t)(a)) << 16) + (uint32_t)(b))
-#define UCHARQUAD_TOUINT64(a, b, c, d) \\
- ((((uint64_t)(UCHARQUAD_TOUINT64(a, b))) << 32) + UCHARPAIR_TOUINT32(c, d))
-
-#else // CPU(BIG_ENDIAN)
-
-#define CHARPAIR_TOUINT16(a, b) \\
- ((((uint16_t)(b)) << 8) + (uint16_t)(a))
-#define CHARQUAD_TOUINT32(a, b, c, d) \\
- ((((uint32_t)(CHARPAIR_TOUINT16(c, d))) << 16) + CHARPAIR_TOUINT16(a, b))
-#define UCHARPAIR_TOUINT32(a, b) \\
- ((((uint32_t)(b)) << 16) + (uint32_t)(a))
-#define UCHARQUAD_TOUINT64(a, b, c, d) \\
- ((((uint64_t)(UCHARPAIR_TOUINT32(c, d))) << 32) + UCHARPAIR_TOUINT32(a, b))
-
-#endif // CPU(BIG_ENDIAN)
-
-
-#define COMPARE_2CHARS(address, char1, char2) \\
- ((reinterpret_cast<const uint16_t*>(address))[0] == CHARPAIR_TOUINT16(char1, char2))
-#define COMPARE_2UCHARS(address, char1, char2) \\
- ((reinterpret_cast<const uint32_t*>(address))[0] == UCHARPAIR_TOUINT32(char1, char2))
-
-#if CPU(X86_64)
-
-#define COMPARE_4CHARS(address, char1, char2, char3, char4) \\
- ((reinterpret_cast<const uint32_t*>(address))[0] == CHARQUAD_TOUINT32(char1, char2, char3, char4))
-#define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\
- ((reinterpret_cast<const uint64_t*>(address))[0] == UCHARQUAD_TOUINT64(char1, char2, char3, char4))
-
-#else // CPU(X86_64)
-
-#define COMPARE_4CHARS(address, char1, char2, char3, char4) \\
- (COMPARE_2CHARS(address, char1, char2) && COMPARE_2CHARS((address) + 2, char3, char4))
-#define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\
- (COMPARE_2UCHARS(address, char1, char2) && COMPARE_2UCHARS((address) + 2, char3, char4))
-
-#endif // CPU(X86_64)
-
-#endif // CPU(NEEDS_ALIGNED_ACCESS)
-
-#define COMPARE_3CHARS(address, char1, char2, char3) \\
- (COMPARE_2CHARS(address, char1, char2) && ((address)[2] == (char3)))
-#define COMPARE_3UCHARS(address, char1, char2, char3) \\
- (COMPARE_2UCHARS(address, char1, char2) && ((address)[2] == (char3)))
-#define COMPARE_5CHARS(address, char1, char2, char3, char4, char5) \\
- (COMPARE_4CHARS(address, char1, char2, char3, char4) && ((address)[4] == (char5)))
-#define COMPARE_5UCHARS(address, char1, char2, char3, char4, char5) \\
- (COMPARE_4UCHARS(address, char1, char2, char3, char4) && ((address)[4] == (char5)))
-#define COMPARE_6CHARS(address, char1, char2, char3, char4, char5, char6) \\
- (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_2CHARS(address + 4, char5, char6))
-#define COMPARE_6UCHARS(address, char1, char2, char3, char4, char5, char6) \\
- (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_2UCHARS(address + 4, char5, char6))
-#define COMPARE_7CHARS(address, char1, char2, char3, char4, char5, char6, char7) \\
- (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_4CHARS(address + 3, char4, char5, char6, char7))
-#define COMPARE_7UCHARS(address, char1, char2, char3, char4, char5, char6, char7) \\
- (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_4UCHARS(address + 3, char4, char5, char6, char7))
-#define COMPARE_8CHARS(address, char1, char2, char3, char4, char5, char6, char7, char8) \\
- (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_4CHARS(address + 4, char5, char6, char7, char8))
-#define COMPARE_8UCHARS(address, char1, char2, char3, char4, char5, char6, char7, char8) \\
- (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_4UCHARS(address + 4, char5, char6, char7, char8))
-""")
trie.printAsC()