| /* |
| * Copyright (C) 2009-2019 Apple Inc. All rights reserved. |
| * Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be) |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "config.h" |
| #include "LiteralParser.h" |
| |
| #include "CodeBlock.h" |
| #include "JSArray.h" |
| #include "JSCInlines.h" |
| #include "Lexer.h" |
| #include "ObjectConstructor.h" |
| #include <wtf/ASCIICType.h> |
| #include <wtf/dtoa.h> |
| #include <wtf/text/StringConcatenate.h> |
| |
| namespace JSC { |
| |
| template <typename CharType> |
| static ALWAYS_INLINE bool isJSONWhiteSpace(const CharType& c) |
| { |
| // The JSON RFC 4627 defines a list of allowed characters to be considered |
| // insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar). |
| return c == ' ' || c == 0x9 || c == 0xA || c == 0xD; |
| } |
| |
| template <typename CharType> |
| bool LiteralParser<CharType>::tryJSONPParse(Vector<JSONPData>& results, bool needsFullSourceInfo) |
| { |
| VM& vm = m_globalObject->vm(); |
| auto scope = DECLARE_THROW_SCOPE(vm); |
| if (m_lexer.next() != TokIdentifier) |
| return false; |
| do { |
| Vector<JSONPPathEntry> path; |
| // Unguarded next to start off the lexer |
| Identifier name = Identifier::fromString(vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start); |
| JSONPPathEntry entry; |
| if (name == vm.propertyNames->varKeyword) { |
| if (m_lexer.next() != TokIdentifier) |
| return false; |
| entry.m_type = JSONPPathEntryTypeDeclareVar; |
| entry.m_pathEntryName = Identifier::fromString(vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start); |
| path.append(entry); |
| } else { |
| entry.m_type = JSONPPathEntryTypeDot; |
| entry.m_pathEntryName = Identifier::fromString(vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start); |
| path.append(entry); |
| } |
| if (isLexerKeyword(entry.m_pathEntryName)) |
| return false; |
| TokenType tokenType = m_lexer.next(); |
| if (entry.m_type == JSONPPathEntryTypeDeclareVar && tokenType != TokAssign) |
| return false; |
| while (tokenType != TokAssign) { |
| switch (tokenType) { |
| case TokLBracket: { |
| entry.m_type = JSONPPathEntryTypeLookup; |
| if (m_lexer.next() != TokNumber) |
| return false; |
| double doubleIndex = m_lexer.currentToken()->numberToken; |
| int index = (int)doubleIndex; |
| if (index != doubleIndex || index < 0) |
| return false; |
| entry.m_pathIndex = index; |
| if (m_lexer.next() != TokRBracket) |
| return false; |
| break; |
| } |
| case TokDot: { |
| entry.m_type = JSONPPathEntryTypeDot; |
| if (m_lexer.next() != TokIdentifier) |
| return false; |
| entry.m_pathEntryName = Identifier::fromString(vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start); |
| break; |
| } |
| case TokLParen: { |
| if (path.last().m_type != JSONPPathEntryTypeDot || needsFullSourceInfo) |
| return false; |
| path.last().m_type = JSONPPathEntryTypeCall; |
| entry = path.last(); |
| goto startJSON; |
| } |
| default: |
| return false; |
| } |
| path.append(entry); |
| tokenType = m_lexer.next(); |
| } |
| startJSON: |
| m_lexer.next(); |
| results.append(JSONPData()); |
| JSValue startParseExpressionValue = parse(StartParseExpression); |
| RETURN_IF_EXCEPTION(scope, false); |
| results.last().m_value.set(vm, startParseExpressionValue); |
| if (!results.last().m_value) |
| return false; |
| results.last().m_path.swap(path); |
| if (entry.m_type == JSONPPathEntryTypeCall) { |
| if (m_lexer.currentToken()->type != TokRParen) |
| return false; |
| m_lexer.next(); |
| } |
| if (m_lexer.currentToken()->type != TokSemi) |
| break; |
| m_lexer.next(); |
| } while (m_lexer.currentToken()->type == TokIdentifier); |
| return m_lexer.currentToken()->type == TokEnd; |
| } |
| |
| template <typename CharType> |
| ALWAYS_INLINE const Identifier LiteralParser<CharType>::makeIdentifier(const LChar* characters, size_t length) |
| { |
| VM& vm = m_globalObject->vm(); |
| if (!length) |
| return vm.propertyNames->emptyIdentifier; |
| if (characters[0] >= MaximumCachableCharacter) |
| return Identifier::fromString(vm, characters, length); |
| |
| if (length == 1) { |
| if (!m_shortIdentifiers[characters[0]].isNull()) |
| return m_shortIdentifiers[characters[0]]; |
| m_shortIdentifiers[characters[0]] = Identifier::fromString(vm, characters, length); |
| return m_shortIdentifiers[characters[0]]; |
| } |
| if (!m_recentIdentifiers[characters[0]].isNull() && Identifier::equal(m_recentIdentifiers[characters[0]].impl(), characters, length)) |
| return m_recentIdentifiers[characters[0]]; |
| m_recentIdentifiers[characters[0]] = Identifier::fromString(vm, characters, length); |
| return m_recentIdentifiers[characters[0]]; |
| } |
| |
| template <typename CharType> |
| ALWAYS_INLINE const Identifier LiteralParser<CharType>::makeIdentifier(const UChar* characters, size_t length) |
| { |
| VM& vm = m_globalObject->vm(); |
| if (!length) |
| return vm.propertyNames->emptyIdentifier; |
| if (characters[0] >= MaximumCachableCharacter) |
| return Identifier::fromString(vm, characters, length); |
| |
| if (length == 1) { |
| if (!m_shortIdentifiers[characters[0]].isNull()) |
| return m_shortIdentifiers[characters[0]]; |
| m_shortIdentifiers[characters[0]] = Identifier::fromString(vm, characters, length); |
| return m_shortIdentifiers[characters[0]]; |
| } |
| if (!m_recentIdentifiers[characters[0]].isNull() && Identifier::equal(m_recentIdentifiers[characters[0]].impl(), characters, length)) |
| return m_recentIdentifiers[characters[0]]; |
| m_recentIdentifiers[characters[0]] = Identifier::fromString(vm, characters, length); |
| return m_recentIdentifiers[characters[0]]; |
| } |
| |
| // 256 Latin-1 codes |
| static constexpr const TokenType TokenTypesOfLatin1Characters[256] = { |
| /* 0 - Null */ TokError, |
| /* 1 - Start of Heading */ TokError, |
| /* 2 - Start of Text */ TokError, |
| /* 3 - End of Text */ TokError, |
| /* 4 - End of Transm. */ TokError, |
| /* 5 - Enquiry */ TokError, |
| /* 6 - Acknowledgment */ TokError, |
| /* 7 - Bell */ TokError, |
| /* 8 - Back Space */ TokError, |
| /* 9 - Horizontal Tab */ TokError, |
| /* 10 - Line Feed */ TokError, |
| /* 11 - Vertical Tab */ TokError, |
| /* 12 - Form Feed */ TokError, |
| /* 13 - Carriage Return */ TokError, |
| /* 14 - Shift Out */ TokError, |
| /* 15 - Shift In */ TokError, |
| /* 16 - Data Line Escape */ TokError, |
| /* 17 - Device Control 1 */ TokError, |
| /* 18 - Device Control 2 */ TokError, |
| /* 19 - Device Control 3 */ TokError, |
| /* 20 - Device Control 4 */ TokError, |
| /* 21 - Negative Ack. */ TokError, |
| /* 22 - Synchronous Idle */ TokError, |
| /* 23 - End of Transmit */ TokError, |
| /* 24 - Cancel */ TokError, |
| /* 25 - End of Medium */ TokError, |
| /* 26 - Substitute */ TokError, |
| /* 27 - Escape */ TokError, |
| /* 28 - File Separator */ TokError, |
| /* 29 - Group Separator */ TokError, |
| /* 30 - Record Separator */ TokError, |
| /* 31 - Unit Separator */ TokError, |
| /* 32 - Space */ TokError, |
| /* 33 - ! */ TokError, |
| /* 34 - " */ TokString, |
| /* 35 - # */ TokError, |
| /* 36 - $ */ TokIdentifier, |
| /* 37 - % */ TokError, |
| /* 38 - & */ TokError, |
| /* 39 - ' */ TokString, |
| /* 40 - ( */ TokLParen, |
| /* 41 - ) */ TokRParen, |
| /* 42 - * */ TokError, |
| /* 43 - + */ TokError, |
| /* 44 - , */ TokComma, |
| /* 45 - - */ TokNumber, |
| /* 46 - . */ TokDot, |
| /* 47 - / */ TokError, |
| /* 48 - 0 */ TokNumber, |
| /* 49 - 1 */ TokNumber, |
| /* 50 - 2 */ TokNumber, |
| /* 51 - 3 */ TokNumber, |
| /* 52 - 4 */ TokNumber, |
| /* 53 - 5 */ TokNumber, |
| /* 54 - 6 */ TokNumber, |
| /* 55 - 7 */ TokNumber, |
| /* 56 - 8 */ TokNumber, |
| /* 57 - 9 */ TokNumber, |
| /* 58 - : */ TokColon, |
| /* 59 - ; */ TokSemi, |
| /* 60 - < */ TokError, |
| /* 61 - = */ TokAssign, |
| /* 62 - > */ TokError, |
| /* 63 - ? */ TokError, |
| /* 64 - @ */ TokError, |
| /* 65 - A */ TokIdentifier, |
| /* 66 - B */ TokIdentifier, |
| /* 67 - C */ TokIdentifier, |
| /* 68 - D */ TokIdentifier, |
| /* 69 - E */ TokIdentifier, |
| /* 70 - F */ TokIdentifier, |
| /* 71 - G */ TokIdentifier, |
| /* 72 - H */ TokIdentifier, |
| /* 73 - I */ TokIdentifier, |
| /* 74 - J */ TokIdentifier, |
| /* 75 - K */ TokIdentifier, |
| /* 76 - L */ TokIdentifier, |
| /* 77 - M */ TokIdentifier, |
| /* 78 - N */ TokIdentifier, |
| /* 79 - O */ TokIdentifier, |
| /* 80 - P */ TokIdentifier, |
| /* 81 - Q */ TokIdentifier, |
| /* 82 - R */ TokIdentifier, |
| /* 83 - S */ TokIdentifier, |
| /* 84 - T */ TokIdentifier, |
| /* 85 - U */ TokIdentifier, |
| /* 86 - V */ TokIdentifier, |
| /* 87 - W */ TokIdentifier, |
| /* 88 - X */ TokIdentifier, |
| /* 89 - Y */ TokIdentifier, |
| /* 90 - Z */ TokIdentifier, |
| /* 91 - [ */ TokLBracket, |
| /* 92 - \ */ TokError, |
| /* 93 - ] */ TokRBracket, |
| /* 94 - ^ */ TokError, |
| /* 95 - _ */ TokIdentifier, |
| /* 96 - ` */ TokError, |
| /* 97 - a */ TokIdentifier, |
| /* 98 - b */ TokIdentifier, |
| /* 99 - c */ TokIdentifier, |
| /* 100 - d */ TokIdentifier, |
| /* 101 - e */ TokIdentifier, |
| /* 102 - f */ TokIdentifier, |
| /* 103 - g */ TokIdentifier, |
| /* 104 - h */ TokIdentifier, |
| /* 105 - i */ TokIdentifier, |
| /* 106 - j */ TokIdentifier, |
| /* 107 - k */ TokIdentifier, |
| /* 108 - l */ TokIdentifier, |
| /* 109 - m */ TokIdentifier, |
| /* 110 - n */ TokIdentifier, |
| /* 111 - o */ TokIdentifier, |
| /* 112 - p */ TokIdentifier, |
| /* 113 - q */ TokIdentifier, |
| /* 114 - r */ TokIdentifier, |
| /* 115 - s */ TokIdentifier, |
| /* 116 - t */ TokIdentifier, |
| /* 117 - u */ TokIdentifier, |
| /* 118 - v */ TokIdentifier, |
| /* 119 - w */ TokIdentifier, |
| /* 120 - x */ TokIdentifier, |
| /* 121 - y */ TokIdentifier, |
| /* 122 - z */ TokIdentifier, |
| /* 123 - { */ TokLBrace, |
| /* 124 - | */ TokError, |
| /* 125 - } */ TokRBrace, |
| /* 126 - ~ */ TokError, |
| /* 127 - Delete */ TokError, |
| /* 128 - Cc category */ TokError, |
| /* 129 - Cc category */ TokError, |
| /* 130 - Cc category */ TokError, |
| /* 131 - Cc category */ TokError, |
| /* 132 - Cc category */ TokError, |
| /* 133 - Cc category */ TokError, |
| /* 134 - Cc category */ TokError, |
| /* 135 - Cc category */ TokError, |
| /* 136 - Cc category */ TokError, |
| /* 137 - Cc category */ TokError, |
| /* 138 - Cc category */ TokError, |
| /* 139 - Cc category */ TokError, |
| /* 140 - Cc category */ TokError, |
| /* 141 - Cc category */ TokError, |
| /* 142 - Cc category */ TokError, |
| /* 143 - Cc category */ TokError, |
| /* 144 - Cc category */ TokError, |
| /* 145 - Cc category */ TokError, |
| /* 146 - Cc category */ TokError, |
| /* 147 - Cc category */ TokError, |
| /* 148 - Cc category */ TokError, |
| /* 149 - Cc category */ TokError, |
| /* 150 - Cc category */ TokError, |
| /* 151 - Cc category */ TokError, |
| /* 152 - Cc category */ TokError, |
| /* 153 - Cc category */ TokError, |
| /* 154 - Cc category */ TokError, |
| /* 155 - Cc category */ TokError, |
| /* 156 - Cc category */ TokError, |
| /* 157 - Cc category */ TokError, |
| /* 158 - Cc category */ TokError, |
| /* 159 - Cc category */ TokError, |
| /* 160 - Zs category (nbsp) */ TokError, |
| /* 161 - Po category */ TokError, |
| /* 162 - Sc category */ TokError, |
| /* 163 - Sc category */ TokError, |
| /* 164 - Sc category */ TokError, |
| /* 165 - Sc category */ TokError, |
| /* 166 - So category */ TokError, |
| /* 167 - So category */ TokError, |
| /* 168 - Sk category */ TokError, |
| /* 169 - So category */ TokError, |
| /* 170 - Ll category */ TokError, |
| /* 171 - Pi category */ TokError, |
| /* 172 - Sm category */ TokError, |
| /* 173 - Cf category */ TokError, |
| /* 174 - So category */ TokError, |
| /* 175 - Sk category */ TokError, |
| /* 176 - So category */ TokError, |
| /* 177 - Sm category */ TokError, |
| /* 178 - No category */ TokError, |
| /* 179 - No category */ TokError, |
| /* 180 - Sk category */ TokError, |
| /* 181 - Ll category */ TokError, |
| /* 182 - So category */ TokError, |
| /* 183 - Po category */ TokError, |
| /* 184 - Sk category */ TokError, |
| /* 185 - No category */ TokError, |
| /* 186 - Ll category */ TokError, |
| /* 187 - Pf category */ TokError, |
| /* 188 - No category */ TokError, |
| /* 189 - No category */ TokError, |
| /* 190 - No category */ TokError, |
| /* 191 - Po category */ TokError, |
| /* 192 - Lu category */ TokError, |
| /* 193 - Lu category */ TokError, |
| /* 194 - Lu category */ TokError, |
| /* 195 - Lu category */ TokError, |
| /* 196 - Lu category */ TokError, |
| /* 197 - Lu category */ TokError, |
| /* 198 - Lu category */ TokError, |
| /* 199 - Lu category */ TokError, |
| /* 200 - Lu category */ TokError, |
| /* 201 - Lu category */ TokError, |
| /* 202 - Lu category */ TokError, |
| /* 203 - Lu category */ TokError, |
| /* 204 - Lu category */ TokError, |
| /* 205 - Lu category */ TokError, |
| /* 206 - Lu category */ TokError, |
| /* 207 - Lu category */ TokError, |
| /* 208 - Lu category */ TokError, |
| /* 209 - Lu category */ TokError, |
| /* 210 - Lu category */ TokError, |
| /* 211 - Lu category */ TokError, |
| /* 212 - Lu category */ TokError, |
| /* 213 - Lu category */ TokError, |
| /* 214 - Lu category */ TokError, |
| /* 215 - Sm category */ TokError, |
| /* 216 - Lu category */ TokError, |
| /* 217 - Lu category */ TokError, |
| /* 218 - Lu category */ TokError, |
| /* 219 - Lu category */ TokError, |
| /* 220 - Lu category */ TokError, |
| /* 221 - Lu category */ TokError, |
| /* 222 - Lu category */ TokError, |
| /* 223 - Ll category */ TokError, |
| /* 224 - Ll category */ TokError, |
| /* 225 - Ll category */ TokError, |
| /* 226 - Ll category */ TokError, |
| /* 227 - Ll category */ TokError, |
| /* 228 - Ll category */ TokError, |
| /* 229 - Ll category */ TokError, |
| /* 230 - Ll category */ TokError, |
| /* 231 - Ll category */ TokError, |
| /* 232 - Ll category */ TokError, |
| /* 233 - Ll category */ TokError, |
| /* 234 - Ll category */ TokError, |
| /* 235 - Ll category */ TokError, |
| /* 236 - Ll category */ TokError, |
| /* 237 - Ll category */ TokError, |
| /* 238 - Ll category */ TokError, |
| /* 239 - Ll category */ TokError, |
| /* 240 - Ll category */ TokError, |
| /* 241 - Ll category */ TokError, |
| /* 242 - Ll category */ TokError, |
| /* 243 - Ll category */ TokError, |
| /* 244 - Ll category */ TokError, |
| /* 245 - Ll category */ TokError, |
| /* 246 - Ll category */ TokError, |
| /* 247 - Sm category */ TokError, |
| /* 248 - Ll category */ TokError, |
| /* 249 - Ll category */ TokError, |
| /* 250 - Ll category */ TokError, |
| /* 251 - Ll category */ TokError, |
| /* 252 - Ll category */ TokError, |
| /* 253 - Ll category */ TokError, |
| /* 254 - Ll category */ TokError, |
| /* 255 - Ll category */ TokError |
| }; |
| |
| template <typename CharType> |
| ALWAYS_INLINE TokenType LiteralParser<CharType>::Lexer::lex(LiteralParserToken<CharType>& token) |
| { |
| #if ASSERT_ENABLED |
| m_currentTokenID++; |
| #endif |
| |
| while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr)) |
| ++m_ptr; |
| |
| ASSERT(m_ptr <= m_end); |
| if (m_ptr == m_end) { |
| token.type = TokEnd; |
| token.start = token.end = m_ptr; |
| return TokEnd; |
| } |
| ASSERT(m_ptr < m_end); |
| token.type = TokError; |
| token.start = m_ptr; |
| CharType character = *m_ptr; |
| if (LIKELY(isLatin1(character))) { |
| TokenType tokenType = TokenTypesOfLatin1Characters[character]; |
| switch (tokenType) { |
| case TokString: |
| if (character == '\'' && m_mode == StrictJSON) { |
| m_lexErrorMessage = "Single quotes (\') are not allowed in JSON"_s; |
| return TokError; |
| } |
| return lexString(token, character); |
| |
| case TokIdentifier: { |
| switch (character) { |
| case 't': |
| if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') { |
| m_ptr += 4; |
| token.type = TokTrue; |
| token.end = m_ptr; |
| return TokTrue; |
| } |
| break; |
| case 'f': |
| if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') { |
| m_ptr += 5; |
| token.type = TokFalse; |
| token.end = m_ptr; |
| return TokFalse; |
| } |
| break; |
| case 'n': |
| if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') { |
| m_ptr += 4; |
| token.type = TokNull; |
| token.end = m_ptr; |
| return TokNull; |
| } |
| break; |
| } |
| return lexIdentifier(token); |
| } |
| |
| case TokNumber: |
| return lexNumber(token); |
| |
| case TokError: |
| break; |
| |
| default: |
| ASSERT(tokenType == TokLBracket |
| || tokenType == TokRBracket |
| || tokenType == TokLBrace |
| || tokenType == TokRBrace |
| || tokenType == TokColon |
| || tokenType == TokLParen |
| || tokenType == TokRParen |
| || tokenType == TokComma |
| || tokenType == TokDot |
| || tokenType == TokAssign |
| || tokenType == TokSemi); |
| token.type = tokenType; |
| token.end = ++m_ptr; |
| return tokenType; |
| } |
| } |
| m_lexErrorMessage = makeString("Unrecognized token '", StringView { m_ptr, 1 }, '\''); |
| return TokError; |
| } |
| |
| template <> |
| ALWAYS_INLINE TokenType LiteralParser<LChar>::Lexer::lexIdentifier(LiteralParserToken<LChar>& token) |
| { |
| while (m_ptr < m_end && (isASCIIAlphanumeric(*m_ptr) || *m_ptr == '_' || *m_ptr == '$')) |
| m_ptr++; |
| token.stringIs8Bit = 1; |
| token.stringToken8 = token.start; |
| token.stringLength = m_ptr - token.start; |
| token.type = TokIdentifier; |
| token.end = m_ptr; |
| return TokIdentifier; |
| } |
| |
| template <> |
| ALWAYS_INLINE TokenType LiteralParser<UChar>::Lexer::lexIdentifier(LiteralParserToken<UChar>& token) |
| { |
| while (m_ptr < m_end && (isASCIIAlphanumeric(*m_ptr) || *m_ptr == '_' || *m_ptr == '$' || *m_ptr == 0x200C || *m_ptr == 0x200D)) |
| m_ptr++; |
| token.stringIs8Bit = 0; |
| token.stringToken16 = token.start; |
| token.stringLength = m_ptr - token.start; |
| token.type = TokIdentifier; |
| token.end = m_ptr; |
| return TokIdentifier; |
| } |
| |
| template <typename CharType> |
| TokenType LiteralParser<CharType>::Lexer::next() |
| { |
| TokenType result = lex(m_currentToken); |
| ASSERT(m_currentToken.type == result); |
| return result; |
| } |
| |
| template <> |
| ALWAYS_INLINE void setParserTokenString<LChar>(LiteralParserToken<LChar>& token, const LChar* string) |
| { |
| token.stringIs8Bit = 1; |
| token.stringToken8 = string; |
| } |
| |
| template <> |
| ALWAYS_INLINE void setParserTokenString<UChar>(LiteralParserToken<UChar>& token, const UChar* string) |
| { |
| token.stringIs8Bit = 0; |
| token.stringToken16 = string; |
| } |
| |
| enum class SafeStringCharacterSet { Strict, NonStrict }; |
| |
| template <SafeStringCharacterSet set> |
| static ALWAYS_INLINE bool isSafeStringCharacter(LChar c, LChar terminator) |
| { |
| return (c >= ' ' && c != '\\' && c != terminator) || (c == '\t' && set != SafeStringCharacterSet::Strict); |
| } |
| |
| template <SafeStringCharacterSet set> |
| static ALWAYS_INLINE bool isSafeStringCharacter(UChar c, UChar terminator) |
| { |
| return (c >= ' ' && (set == SafeStringCharacterSet::Strict || isLatin1(c)) && c != '\\' && c != terminator) || (c == '\t' && set != SafeStringCharacterSet::Strict); |
| } |
| |
| template <typename CharType> |
| ALWAYS_INLINE TokenType LiteralParser<CharType>::Lexer::lexString(LiteralParserToken<CharType>& token, CharType terminator) |
| { |
| ++m_ptr; |
| const CharType* runStart = m_ptr; |
| |
| if (m_mode == StrictJSON) { |
| while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::Strict>(*m_ptr, terminator)) |
| ++m_ptr; |
| } else { |
| while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::NonStrict>(*m_ptr, terminator)) |
| ++m_ptr; |
| } |
| |
| if (LIKELY(m_ptr < m_end && *m_ptr == terminator)) { |
| setParserTokenString<CharType>(token, runStart); |
| token.stringLength = m_ptr - runStart; |
| token.type = TokString; |
| token.end = ++m_ptr; |
| return TokString; |
| } |
| return lexStringSlow(token, runStart, terminator); |
| } |
| |
| template <typename CharType> |
| TokenType LiteralParser<CharType>::Lexer::lexStringSlow(LiteralParserToken<CharType>& token, const CharType* runStart, CharType terminator) |
| { |
| m_builder.clear(); |
| goto slowPathBegin; |
| do { |
| runStart = m_ptr; |
| if (m_mode == StrictJSON) { |
| while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::Strict>(*m_ptr, terminator)) |
| ++m_ptr; |
| } else { |
| while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::NonStrict>(*m_ptr, terminator)) |
| ++m_ptr; |
| } |
| |
| if (!m_builder.isEmpty()) |
| m_builder.appendCharacters(runStart, m_ptr - runStart); |
| |
| slowPathBegin: |
| if ((m_mode != NonStrictJSON) && m_ptr < m_end && *m_ptr == '\\') { |
| if (m_builder.isEmpty() && runStart < m_ptr) |
| m_builder.appendCharacters(runStart, m_ptr - runStart); |
| ++m_ptr; |
| if (m_ptr >= m_end) { |
| m_lexErrorMessage = "Unterminated string"_s; |
| return TokError; |
| } |
| switch (*m_ptr) { |
| case '"': |
| m_builder.append('"'); |
| m_ptr++; |
| break; |
| case '\\': |
| m_builder.append('\\'); |
| m_ptr++; |
| break; |
| case '/': |
| m_builder.append('/'); |
| m_ptr++; |
| break; |
| case 'b': |
| m_builder.append('\b'); |
| m_ptr++; |
| break; |
| case 'f': |
| m_builder.append('\f'); |
| m_ptr++; |
| break; |
| case 'n': |
| m_builder.append('\n'); |
| m_ptr++; |
| break; |
| case 'r': |
| m_builder.append('\r'); |
| m_ptr++; |
| break; |
| case 't': |
| m_builder.append('\t'); |
| m_ptr++; |
| break; |
| |
| case 'u': |
| if ((m_end - m_ptr) < 5) { |
| m_lexErrorMessage = "\\u must be followed by 4 hex digits"_s; |
| return TokError; |
| } // uNNNN == 5 characters |
| for (int i = 1; i < 5; i++) { |
| if (!isASCIIHexDigit(m_ptr[i])) { |
| m_lexErrorMessage = makeString("\"\\", StringView { m_ptr, 5 }, "\" is not a valid unicode escape"); |
| return TokError; |
| } |
| } |
| m_builder.append(JSC::Lexer<CharType>::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4])); |
| m_ptr += 5; |
| break; |
| |
| default: |
| if (*m_ptr == '\'' && m_mode != StrictJSON) { |
| m_builder.append('\''); |
| m_ptr++; |
| break; |
| } |
| m_lexErrorMessage = makeString("Invalid escape character ", StringView { m_ptr, 1 }); |
| return TokError; |
| } |
| } |
| } while ((m_mode != NonStrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != terminator); |
| |
| if (m_ptr >= m_end || *m_ptr != terminator) { |
| m_lexErrorMessage = "Unterminated string"_s; |
| return TokError; |
| } |
| |
| if (m_builder.isEmpty()) { |
| setParserTokenString<CharType>(token, runStart); |
| token.stringLength = m_ptr - runStart; |
| } else { |
| if (m_builder.is8Bit()) { |
| token.stringIs8Bit = 1; |
| token.stringToken8 = m_builder.characters8(); |
| } else { |
| token.stringIs8Bit = 0; |
| token.stringToken16 = m_builder.characters16(); |
| } |
| token.stringLength = m_builder.length(); |
| } |
| token.type = TokString; |
| token.end = ++m_ptr; |
| return TokString; |
| } |
| |
| template <typename CharType> |
| TokenType LiteralParser<CharType>::Lexer::lexNumber(LiteralParserToken<CharType>& token) |
| { |
| // ES5 and json.org define numbers as |
| // number |
| // int |
| // int frac? exp? |
| // |
| // int |
| // -? 0 |
| // -? digit1-9 digits? |
| // |
| // digits |
| // digit digits? |
| // |
| // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)? |
| |
| if (m_ptr < m_end && *m_ptr == '-') // -? |
| ++m_ptr; |
| |
| // (0 | [1-9][0-9]*) |
| if (m_ptr < m_end && *m_ptr == '0') // 0 |
| ++m_ptr; |
| else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9] |
| ++m_ptr; |
| // [0-9]* |
| while (m_ptr < m_end && isASCIIDigit(*m_ptr)) |
| ++m_ptr; |
| } else { |
| m_lexErrorMessage = "Invalid number"_s; |
| return TokError; |
| } |
| |
| // ('.' [0-9]+)? |
| const int NumberOfDigitsForSafeInt32 = 9; // The numbers from -99999999 to 999999999 are always in range of Int32. |
| if (m_ptr < m_end && *m_ptr == '.') { |
| ++m_ptr; |
| // [0-9]+ |
| if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) { |
| m_lexErrorMessage = "Invalid digits after decimal point"_s; |
| return TokError; |
| } |
| |
| ++m_ptr; |
| while (m_ptr < m_end && isASCIIDigit(*m_ptr)) |
| ++m_ptr; |
| } else if (m_ptr < m_end && (*m_ptr != 'e' && *m_ptr != 'E') && (m_ptr - token.start) <= NumberOfDigitsForSafeInt32) { |
| int32_t result = 0; |
| token.type = TokNumber; |
| token.end = m_ptr; |
| const CharType* digit = token.start; |
| bool negative = false; |
| if (*digit == '-') { |
| negative = true; |
| digit++; |
| } |
| |
| ASSERT((m_ptr - digit) <= NumberOfDigitsForSafeInt32); |
| while (digit < m_ptr) |
| result = result * 10 + (*digit++) - '0'; |
| |
| if (!negative) |
| token.numberToken = result; |
| else { |
| if (!result) |
| token.numberToken = -0.0; |
| else |
| token.numberToken = -result; |
| } |
| return TokNumber; |
| } |
| |
| // ([eE][+-]? [0-9]+)? |
| if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE] |
| ++m_ptr; |
| |
| // [-+]? |
| if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+')) |
| ++m_ptr; |
| |
| // [0-9]+ |
| if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) { |
| m_lexErrorMessage = "Exponent symbols should be followed by an optional '+' or '-' and then by at least one number"_s; |
| return TokError; |
| } |
| |
| ++m_ptr; |
| while (m_ptr < m_end && isASCIIDigit(*m_ptr)) |
| ++m_ptr; |
| } |
| |
| token.type = TokNumber; |
| token.end = m_ptr; |
| size_t parsedLength; |
| token.numberToken = parseDouble(token.start, token.end - token.start, parsedLength); |
| return TokNumber; |
| } |
| |
| template <typename CharType> |
| JSValue LiteralParser<CharType>::parse(ParserState initialState) |
| { |
| VM& vm = m_globalObject->vm(); |
| auto scope = DECLARE_THROW_SCOPE(vm); |
| ParserState state = initialState; |
| MarkedArgumentBuffer objectStack; |
| JSValue lastValue; |
| Vector<ParserState, 16, UnsafeVectorOverflow> stateStack; |
| Vector<Identifier, 16, UnsafeVectorOverflow> identifierStack; |
| HashSet<JSObject*> visitedUnderscoreProto; |
| while (1) { |
| switch(state) { |
| startParseArray: |
| case StartParseArray: { |
| JSArray* array = constructEmptyArray(m_globalObject, nullptr); |
| RETURN_IF_EXCEPTION(scope, JSValue()); |
| objectStack.appendWithCrashOnOverflow(array); |
| } |
| doParseArrayStartExpression: |
| FALLTHROUGH; |
| case DoParseArrayStartExpression: { |
| TokenType lastToken = m_lexer.currentToken()->type; |
| if (m_lexer.next() == TokRBracket) { |
| if (lastToken == TokComma) { |
| m_parseErrorMessage = "Unexpected comma at the end of array expression"_s; |
| return JSValue(); |
| } |
| m_lexer.next(); |
| lastValue = objectStack.takeLast(); |
| break; |
| } |
| |
| stateStack.append(DoParseArrayEndExpression); |
| goto startParseExpression; |
| } |
| case DoParseArrayEndExpression: { |
| JSArray* array = asArray(objectStack.last()); |
| array->putDirectIndex(m_globalObject, array->length(), lastValue); |
| RETURN_IF_EXCEPTION(scope, JSValue()); |
| |
| if (m_lexer.currentToken()->type == TokComma) |
| goto doParseArrayStartExpression; |
| |
| if (m_lexer.currentToken()->type != TokRBracket) { |
| m_parseErrorMessage = "Expected ']'"_s; |
| return JSValue(); |
| } |
| |
| m_lexer.next(); |
| lastValue = objectStack.takeLast(); |
| break; |
| } |
| startParseObject: |
| case StartParseObject: { |
| JSObject* object = constructEmptyObject(m_globalObject); |
| objectStack.appendWithCrashOnOverflow(object); |
| |
| TokenType type = m_lexer.next(); |
| if (type == TokString || (m_mode != StrictJSON && type == TokIdentifier)) { |
| typename Lexer::LiteralParserTokenPtr identifierToken = m_lexer.currentToken(); |
| if (identifierToken->stringIs8Bit) |
| identifierStack.append(makeIdentifier(identifierToken->stringToken8, identifierToken->stringLength)); |
| else |
| identifierStack.append(makeIdentifier(identifierToken->stringToken16, identifierToken->stringLength)); |
| |
| // Check for colon |
| if (m_lexer.next() != TokColon) { |
| m_parseErrorMessage = "Expected ':' before value in object property definition"_s; |
| return JSValue(); |
| } |
| |
| m_lexer.next(); |
| stateStack.append(DoParseObjectEndExpression); |
| goto startParseExpression; |
| } |
| if (type != TokRBrace) { |
| m_parseErrorMessage = "Expected '}'"_s; |
| return JSValue(); |
| } |
| m_lexer.next(); |
| lastValue = objectStack.takeLast(); |
| break; |
| } |
| doParseObjectStartExpression: |
| case DoParseObjectStartExpression: { |
| TokenType type = m_lexer.next(); |
| if (type != TokString && (m_mode == StrictJSON || type != TokIdentifier)) { |
| m_parseErrorMessage = "Property name must be a string literal"_s; |
| return JSValue(); |
| } |
| typename Lexer::LiteralParserTokenPtr identifierToken = m_lexer.currentToken(); |
| if (identifierToken->stringIs8Bit) |
| identifierStack.append(makeIdentifier(identifierToken->stringToken8, identifierToken->stringLength)); |
| else |
| identifierStack.append(makeIdentifier(identifierToken->stringToken16, identifierToken->stringLength)); |
| |
| // Check for colon |
| if (m_lexer.next() != TokColon) { |
| m_parseErrorMessage = "Expected ':'"_s; |
| return JSValue(); |
| } |
| |
| m_lexer.next(); |
| stateStack.append(DoParseObjectEndExpression); |
| goto startParseExpression; |
| } |
| case DoParseObjectEndExpression: |
| { |
| JSObject* object = asObject(objectStack.last()); |
| Identifier ident = identifierStack.takeLast(); |
| if (m_mode != StrictJSON && ident == vm.propertyNames->underscoreProto) { |
| if (!visitedUnderscoreProto.add(object).isNewEntry) { |
| m_parseErrorMessage = "Attempted to redefine __proto__ property"_s; |
| return JSValue(); |
| } |
| PutPropertySlot slot(object, m_nullOrCodeBlock ? m_nullOrCodeBlock->ownerExecutable()->isInStrictContext() : false); |
| objectStack.last().put(m_globalObject, ident, lastValue, slot); |
| } else { |
| if (Optional<uint32_t> index = parseIndex(ident)) |
| object->putDirectIndex(m_globalObject, index.value(), lastValue); |
| else |
| object->putDirect(vm, ident, lastValue); |
| } |
| RETURN_IF_EXCEPTION(scope, JSValue()); |
| if (m_lexer.currentToken()->type == TokComma) |
| goto doParseObjectStartExpression; |
| if (m_lexer.currentToken()->type != TokRBrace) { |
| m_parseErrorMessage = "Expected '}'"_s; |
| return JSValue(); |
| } |
| m_lexer.next(); |
| lastValue = objectStack.takeLast(); |
| break; |
| } |
| startParseExpression: |
| case StartParseExpression: { |
| switch (m_lexer.currentToken()->type) { |
| case TokLBracket: |
| goto startParseArray; |
| case TokLBrace: |
| goto startParseObject; |
| case TokString: { |
| typename Lexer::LiteralParserTokenPtr stringToken = m_lexer.currentToken(); |
| if (stringToken->stringIs8Bit) |
| lastValue = jsString(vm, makeIdentifier(stringToken->stringToken8, stringToken->stringLength).string()); |
| else |
| lastValue = jsString(vm, makeIdentifier(stringToken->stringToken16, stringToken->stringLength).string()); |
| m_lexer.next(); |
| break; |
| } |
| case TokNumber: { |
| typename Lexer::LiteralParserTokenPtr numberToken = m_lexer.currentToken(); |
| lastValue = jsNumber(numberToken->numberToken); |
| m_lexer.next(); |
| break; |
| } |
| case TokNull: |
| m_lexer.next(); |
| lastValue = jsNull(); |
| break; |
| |
| case TokTrue: |
| m_lexer.next(); |
| lastValue = jsBoolean(true); |
| break; |
| |
| case TokFalse: |
| m_lexer.next(); |
| lastValue = jsBoolean(false); |
| break; |
| case TokRBracket: |
| m_parseErrorMessage = "Unexpected token ']'"_s; |
| return JSValue(); |
| case TokRBrace: |
| m_parseErrorMessage = "Unexpected token '}'"_s; |
| return JSValue(); |
| case TokIdentifier: { |
| typename Lexer::LiteralParserTokenPtr token = m_lexer.currentToken(); |
| if (token->stringIs8Bit) |
| m_parseErrorMessage = makeString("Unexpected identifier \"", StringView { token->stringToken8, token->stringLength }, '"'); |
| else |
| m_parseErrorMessage = makeString("Unexpected identifier \"", StringView { token->stringToken16, token->stringLength }, '"'); |
| return JSValue(); |
| } |
| case TokColon: |
| m_parseErrorMessage = "Unexpected token ':'"_s; |
| return JSValue(); |
| case TokLParen: |
| m_parseErrorMessage = "Unexpected token '('"_s; |
| return JSValue(); |
| case TokRParen: |
| m_parseErrorMessage = "Unexpected token ')'"_s; |
| return JSValue(); |
| case TokComma: |
| m_parseErrorMessage = "Unexpected token ','"_s; |
| return JSValue(); |
| case TokDot: |
| m_parseErrorMessage = "Unexpected token '.'"_s; |
| return JSValue(); |
| case TokAssign: |
| m_parseErrorMessage = "Unexpected token '='"_s; |
| return JSValue(); |
| case TokSemi: |
| m_parseErrorMessage = "Unexpected token ';'"_s; |
| return JSValue(); |
| case TokEnd: |
| m_parseErrorMessage = "Unexpected EOF"_s; |
| return JSValue(); |
| case TokError: |
| default: |
| // Error |
| m_parseErrorMessage = "Could not parse value expression"_s; |
| return JSValue(); |
| } |
| break; |
| } |
| case StartParseStatement: { |
| switch (m_lexer.currentToken()->type) { |
| case TokLBracket: |
| case TokNumber: |
| case TokString: |
| goto startParseExpression; |
| |
| case TokLParen: { |
| m_lexer.next(); |
| stateStack.append(StartParseStatementEndStatement); |
| goto startParseExpression; |
| } |
| case TokRBracket: |
| m_parseErrorMessage = "Unexpected token ']'"_s; |
| return JSValue(); |
| case TokLBrace: |
| m_parseErrorMessage = "Unexpected token '{'"_s; |
| return JSValue(); |
| case TokRBrace: |
| m_parseErrorMessage = "Unexpected token '}'"_s; |
| return JSValue(); |
| case TokIdentifier: |
| m_parseErrorMessage = "Unexpected identifier"_s; |
| return JSValue(); |
| case TokColon: |
| m_parseErrorMessage = "Unexpected token ':'"_s; |
| return JSValue(); |
| case TokRParen: |
| m_parseErrorMessage = "Unexpected token ')'"_s; |
| return JSValue(); |
| case TokComma: |
| m_parseErrorMessage = "Unexpected token ','"_s; |
| return JSValue(); |
| case TokTrue: |
| m_parseErrorMessage = "Unexpected token 'true'"_s; |
| return JSValue(); |
| case TokFalse: |
| m_parseErrorMessage = "Unexpected token 'false'"_s; |
| return JSValue(); |
| case TokNull: |
| m_parseErrorMessage = "Unexpected token 'null'"_s; |
| return JSValue(); |
| case TokEnd: |
| m_parseErrorMessage = "Unexpected EOF"_s; |
| return JSValue(); |
| case TokDot: |
| m_parseErrorMessage = "Unexpected token '.'"_s; |
| return JSValue(); |
| case TokAssign: |
| m_parseErrorMessage = "Unexpected token '='"_s; |
| return JSValue(); |
| case TokSemi: |
| m_parseErrorMessage = "Unexpected token ';'"_s; |
| return JSValue(); |
| case TokError: |
| default: |
| m_parseErrorMessage = "Could not parse statement"_s; |
| return JSValue(); |
| } |
| } |
| case StartParseStatementEndStatement: { |
| ASSERT(stateStack.isEmpty()); |
| if (m_lexer.currentToken()->type != TokRParen) |
| return JSValue(); |
| if (m_lexer.next() == TokEnd) |
| return lastValue; |
| m_parseErrorMessage = "Unexpected content at end of JSON literal"_s; |
| return JSValue(); |
| } |
| default: |
| RELEASE_ASSERT_NOT_REACHED(); |
| } |
| if (stateStack.isEmpty()) |
| return lastValue; |
| state = stateStack.takeLast(); |
| continue; |
| } |
| } |
| |
| // Instantiate the two flavors of LiteralParser we need instead of putting most of this file in LiteralParser.h |
| template class LiteralParser<LChar>; |
| template class LiteralParser<UChar>; |
| |
| } |