blob: 50f1eca33b00400a68d6825d8073d68d1abaf58f [file] [log] [blame]
/*
* Copyright (C) 2009-2019 Apple Inc. All rights reserved.
* Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "LiteralParser.h"
#include "ButterflyInlines.h"
#include "CodeBlock.h"
#include "JSArray.h"
#include "JSString.h"
#include "Lexer.h"
#include "ObjectConstructor.h"
#include "JSCInlines.h"
#include "StrongInlines.h"
#include <wtf/ASCIICType.h>
#include <wtf/dtoa.h>
#include <wtf/text/StringConcatenate.h>
namespace JSC {
template <typename CharType>
static ALWAYS_INLINE bool isJSONWhiteSpace(const CharType& c)
{
// The JSON RFC 4627 defines a list of allowed characters to be considered
// insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar).
return c == ' ' || c == 0x9 || c == 0xA || c == 0xD;
}
template <typename CharType>
bool LiteralParser<CharType>::tryJSONPParse(Vector<JSONPData>& results, bool needsFullSourceInfo)
{
VM& vm = m_globalObject->vm();
auto scope = DECLARE_THROW_SCOPE(vm);
if (m_lexer.next() != TokIdentifier)
return false;
do {
Vector<JSONPPathEntry> path;
// Unguarded next to start off the lexer
Identifier name = Identifier::fromString(vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
JSONPPathEntry entry;
if (name == vm.propertyNames->varKeyword) {
if (m_lexer.next() != TokIdentifier)
return false;
entry.m_type = JSONPPathEntryTypeDeclareVar;
entry.m_pathEntryName = Identifier::fromString(vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
path.append(entry);
} else {
entry.m_type = JSONPPathEntryTypeDot;
entry.m_pathEntryName = Identifier::fromString(vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
path.append(entry);
}
if (isLexerKeyword(entry.m_pathEntryName))
return false;
TokenType tokenType = m_lexer.next();
if (entry.m_type == JSONPPathEntryTypeDeclareVar && tokenType != TokAssign)
return false;
while (tokenType != TokAssign) {
switch (tokenType) {
case TokLBracket: {
entry.m_type = JSONPPathEntryTypeLookup;
if (m_lexer.next() != TokNumber)
return false;
double doubleIndex = m_lexer.currentToken()->numberToken;
int index = (int)doubleIndex;
if (index != doubleIndex || index < 0)
return false;
entry.m_pathIndex = index;
if (m_lexer.next() != TokRBracket)
return false;
break;
}
case TokDot: {
entry.m_type = JSONPPathEntryTypeDot;
if (m_lexer.next() != TokIdentifier)
return false;
entry.m_pathEntryName = Identifier::fromString(vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
break;
}
case TokLParen: {
if (path.last().m_type != JSONPPathEntryTypeDot || needsFullSourceInfo)
return false;
path.last().m_type = JSONPPathEntryTypeCall;
entry = path.last();
goto startJSON;
}
default:
return false;
}
path.append(entry);
tokenType = m_lexer.next();
}
startJSON:
m_lexer.next();
results.append(JSONPData());
JSValue startParseExpressionValue = parse(StartParseExpression);
RETURN_IF_EXCEPTION(scope, false);
results.last().m_value.set(vm, startParseExpressionValue);
if (!results.last().m_value)
return false;
results.last().m_path.swap(path);
if (entry.m_type == JSONPPathEntryTypeCall) {
if (m_lexer.currentToken()->type != TokRParen)
return false;
m_lexer.next();
}
if (m_lexer.currentToken()->type != TokSemi)
break;
m_lexer.next();
} while (m_lexer.currentToken()->type == TokIdentifier);
return m_lexer.currentToken()->type == TokEnd;
}
template <typename CharType>
ALWAYS_INLINE const Identifier LiteralParser<CharType>::makeIdentifier(const LChar* characters, size_t length)
{
VM& vm = m_globalObject->vm();
if (!length)
return vm.propertyNames->emptyIdentifier;
if (characters[0] >= MaximumCachableCharacter)
return Identifier::fromString(vm, characters, length);
if (length == 1) {
if (!m_shortIdentifiers[characters[0]].isNull())
return m_shortIdentifiers[characters[0]];
m_shortIdentifiers[characters[0]] = Identifier::fromString(vm, characters, length);
return m_shortIdentifiers[characters[0]];
}
if (!m_recentIdentifiers[characters[0]].isNull() && Identifier::equal(m_recentIdentifiers[characters[0]].impl(), characters, length))
return m_recentIdentifiers[characters[0]];
m_recentIdentifiers[characters[0]] = Identifier::fromString(vm, characters, length);
return m_recentIdentifiers[characters[0]];
}
template <typename CharType>
ALWAYS_INLINE const Identifier LiteralParser<CharType>::makeIdentifier(const UChar* characters, size_t length)
{
VM& vm = m_globalObject->vm();
if (!length)
return vm.propertyNames->emptyIdentifier;
if (characters[0] >= MaximumCachableCharacter)
return Identifier::fromString(vm, characters, length);
if (length == 1) {
if (!m_shortIdentifiers[characters[0]].isNull())
return m_shortIdentifiers[characters[0]];
m_shortIdentifiers[characters[0]] = Identifier::fromString(vm, characters, length);
return m_shortIdentifiers[characters[0]];
}
if (!m_recentIdentifiers[characters[0]].isNull() && Identifier::equal(m_recentIdentifiers[characters[0]].impl(), characters, length))
return m_recentIdentifiers[characters[0]];
m_recentIdentifiers[characters[0]] = Identifier::fromString(vm, characters, length);
return m_recentIdentifiers[characters[0]];
}
// 256 Latin-1 codes
static constexpr const TokenType TokenTypesOfLatin1Characters[256] = {
/* 0 - Null */ TokError,
/* 1 - Start of Heading */ TokError,
/* 2 - Start of Text */ TokError,
/* 3 - End of Text */ TokError,
/* 4 - End of Transm. */ TokError,
/* 5 - Enquiry */ TokError,
/* 6 - Acknowledgment */ TokError,
/* 7 - Bell */ TokError,
/* 8 - Back Space */ TokError,
/* 9 - Horizontal Tab */ TokError,
/* 10 - Line Feed */ TokError,
/* 11 - Vertical Tab */ TokError,
/* 12 - Form Feed */ TokError,
/* 13 - Carriage Return */ TokError,
/* 14 - Shift Out */ TokError,
/* 15 - Shift In */ TokError,
/* 16 - Data Line Escape */ TokError,
/* 17 - Device Control 1 */ TokError,
/* 18 - Device Control 2 */ TokError,
/* 19 - Device Control 3 */ TokError,
/* 20 - Device Control 4 */ TokError,
/* 21 - Negative Ack. */ TokError,
/* 22 - Synchronous Idle */ TokError,
/* 23 - End of Transmit */ TokError,
/* 24 - Cancel */ TokError,
/* 25 - End of Medium */ TokError,
/* 26 - Substitute */ TokError,
/* 27 - Escape */ TokError,
/* 28 - File Separator */ TokError,
/* 29 - Group Separator */ TokError,
/* 30 - Record Separator */ TokError,
/* 31 - Unit Separator */ TokError,
/* 32 - Space */ TokError,
/* 33 - ! */ TokError,
/* 34 - " */ TokString,
/* 35 - # */ TokError,
/* 36 - $ */ TokIdentifier,
/* 37 - % */ TokError,
/* 38 - & */ TokError,
/* 39 - ' */ TokString,
/* 40 - ( */ TokLParen,
/* 41 - ) */ TokRParen,
/* 42 - * */ TokError,
/* 43 - + */ TokError,
/* 44 - , */ TokComma,
/* 45 - - */ TokNumber,
/* 46 - . */ TokDot,
/* 47 - / */ TokError,
/* 48 - 0 */ TokNumber,
/* 49 - 1 */ TokNumber,
/* 50 - 2 */ TokNumber,
/* 51 - 3 */ TokNumber,
/* 52 - 4 */ TokNumber,
/* 53 - 5 */ TokNumber,
/* 54 - 6 */ TokNumber,
/* 55 - 7 */ TokNumber,
/* 56 - 8 */ TokNumber,
/* 57 - 9 */ TokNumber,
/* 58 - : */ TokColon,
/* 59 - ; */ TokSemi,
/* 60 - < */ TokError,
/* 61 - = */ TokAssign,
/* 62 - > */ TokError,
/* 63 - ? */ TokError,
/* 64 - @ */ TokError,
/* 65 - A */ TokIdentifier,
/* 66 - B */ TokIdentifier,
/* 67 - C */ TokIdentifier,
/* 68 - D */ TokIdentifier,
/* 69 - E */ TokIdentifier,
/* 70 - F */ TokIdentifier,
/* 71 - G */ TokIdentifier,
/* 72 - H */ TokIdentifier,
/* 73 - I */ TokIdentifier,
/* 74 - J */ TokIdentifier,
/* 75 - K */ TokIdentifier,
/* 76 - L */ TokIdentifier,
/* 77 - M */ TokIdentifier,
/* 78 - N */ TokIdentifier,
/* 79 - O */ TokIdentifier,
/* 80 - P */ TokIdentifier,
/* 81 - Q */ TokIdentifier,
/* 82 - R */ TokIdentifier,
/* 83 - S */ TokIdentifier,
/* 84 - T */ TokIdentifier,
/* 85 - U */ TokIdentifier,
/* 86 - V */ TokIdentifier,
/* 87 - W */ TokIdentifier,
/* 88 - X */ TokIdentifier,
/* 89 - Y */ TokIdentifier,
/* 90 - Z */ TokIdentifier,
/* 91 - [ */ TokLBracket,
/* 92 - \ */ TokError,
/* 93 - ] */ TokRBracket,
/* 94 - ^ */ TokError,
/* 95 - _ */ TokIdentifier,
/* 96 - ` */ TokError,
/* 97 - a */ TokIdentifier,
/* 98 - b */ TokIdentifier,
/* 99 - c */ TokIdentifier,
/* 100 - d */ TokIdentifier,
/* 101 - e */ TokIdentifier,
/* 102 - f */ TokIdentifier,
/* 103 - g */ TokIdentifier,
/* 104 - h */ TokIdentifier,
/* 105 - i */ TokIdentifier,
/* 106 - j */ TokIdentifier,
/* 107 - k */ TokIdentifier,
/* 108 - l */ TokIdentifier,
/* 109 - m */ TokIdentifier,
/* 110 - n */ TokIdentifier,
/* 111 - o */ TokIdentifier,
/* 112 - p */ TokIdentifier,
/* 113 - q */ TokIdentifier,
/* 114 - r */ TokIdentifier,
/* 115 - s */ TokIdentifier,
/* 116 - t */ TokIdentifier,
/* 117 - u */ TokIdentifier,
/* 118 - v */ TokIdentifier,
/* 119 - w */ TokIdentifier,
/* 120 - x */ TokIdentifier,
/* 121 - y */ TokIdentifier,
/* 122 - z */ TokIdentifier,
/* 123 - { */ TokLBrace,
/* 124 - | */ TokError,
/* 125 - } */ TokRBrace,
/* 126 - ~ */ TokError,
/* 127 - Delete */ TokError,
/* 128 - Cc category */ TokError,
/* 129 - Cc category */ TokError,
/* 130 - Cc category */ TokError,
/* 131 - Cc category */ TokError,
/* 132 - Cc category */ TokError,
/* 133 - Cc category */ TokError,
/* 134 - Cc category */ TokError,
/* 135 - Cc category */ TokError,
/* 136 - Cc category */ TokError,
/* 137 - Cc category */ TokError,
/* 138 - Cc category */ TokError,
/* 139 - Cc category */ TokError,
/* 140 - Cc category */ TokError,
/* 141 - Cc category */ TokError,
/* 142 - Cc category */ TokError,
/* 143 - Cc category */ TokError,
/* 144 - Cc category */ TokError,
/* 145 - Cc category */ TokError,
/* 146 - Cc category */ TokError,
/* 147 - Cc category */ TokError,
/* 148 - Cc category */ TokError,
/* 149 - Cc category */ TokError,
/* 150 - Cc category */ TokError,
/* 151 - Cc category */ TokError,
/* 152 - Cc category */ TokError,
/* 153 - Cc category */ TokError,
/* 154 - Cc category */ TokError,
/* 155 - Cc category */ TokError,
/* 156 - Cc category */ TokError,
/* 157 - Cc category */ TokError,
/* 158 - Cc category */ TokError,
/* 159 - Cc category */ TokError,
/* 160 - Zs category (nbsp) */ TokError,
/* 161 - Po category */ TokError,
/* 162 - Sc category */ TokError,
/* 163 - Sc category */ TokError,
/* 164 - Sc category */ TokError,
/* 165 - Sc category */ TokError,
/* 166 - So category */ TokError,
/* 167 - So category */ TokError,
/* 168 - Sk category */ TokError,
/* 169 - So category */ TokError,
/* 170 - Ll category */ TokError,
/* 171 - Pi category */ TokError,
/* 172 - Sm category */ TokError,
/* 173 - Cf category */ TokError,
/* 174 - So category */ TokError,
/* 175 - Sk category */ TokError,
/* 176 - So category */ TokError,
/* 177 - Sm category */ TokError,
/* 178 - No category */ TokError,
/* 179 - No category */ TokError,
/* 180 - Sk category */ TokError,
/* 181 - Ll category */ TokError,
/* 182 - So category */ TokError,
/* 183 - Po category */ TokError,
/* 184 - Sk category */ TokError,
/* 185 - No category */ TokError,
/* 186 - Ll category */ TokError,
/* 187 - Pf category */ TokError,
/* 188 - No category */ TokError,
/* 189 - No category */ TokError,
/* 190 - No category */ TokError,
/* 191 - Po category */ TokError,
/* 192 - Lu category */ TokError,
/* 193 - Lu category */ TokError,
/* 194 - Lu category */ TokError,
/* 195 - Lu category */ TokError,
/* 196 - Lu category */ TokError,
/* 197 - Lu category */ TokError,
/* 198 - Lu category */ TokError,
/* 199 - Lu category */ TokError,
/* 200 - Lu category */ TokError,
/* 201 - Lu category */ TokError,
/* 202 - Lu category */ TokError,
/* 203 - Lu category */ TokError,
/* 204 - Lu category */ TokError,
/* 205 - Lu category */ TokError,
/* 206 - Lu category */ TokError,
/* 207 - Lu category */ TokError,
/* 208 - Lu category */ TokError,
/* 209 - Lu category */ TokError,
/* 210 - Lu category */ TokError,
/* 211 - Lu category */ TokError,
/* 212 - Lu category */ TokError,
/* 213 - Lu category */ TokError,
/* 214 - Lu category */ TokError,
/* 215 - Sm category */ TokError,
/* 216 - Lu category */ TokError,
/* 217 - Lu category */ TokError,
/* 218 - Lu category */ TokError,
/* 219 - Lu category */ TokError,
/* 220 - Lu category */ TokError,
/* 221 - Lu category */ TokError,
/* 222 - Lu category */ TokError,
/* 223 - Ll category */ TokError,
/* 224 - Ll category */ TokError,
/* 225 - Ll category */ TokError,
/* 226 - Ll category */ TokError,
/* 227 - Ll category */ TokError,
/* 228 - Ll category */ TokError,
/* 229 - Ll category */ TokError,
/* 230 - Ll category */ TokError,
/* 231 - Ll category */ TokError,
/* 232 - Ll category */ TokError,
/* 233 - Ll category */ TokError,
/* 234 - Ll category */ TokError,
/* 235 - Ll category */ TokError,
/* 236 - Ll category */ TokError,
/* 237 - Ll category */ TokError,
/* 238 - Ll category */ TokError,
/* 239 - Ll category */ TokError,
/* 240 - Ll category */ TokError,
/* 241 - Ll category */ TokError,
/* 242 - Ll category */ TokError,
/* 243 - Ll category */ TokError,
/* 244 - Ll category */ TokError,
/* 245 - Ll category */ TokError,
/* 246 - Ll category */ TokError,
/* 247 - Sm category */ TokError,
/* 248 - Ll category */ TokError,
/* 249 - Ll category */ TokError,
/* 250 - Ll category */ TokError,
/* 251 - Ll category */ TokError,
/* 252 - Ll category */ TokError,
/* 253 - Ll category */ TokError,
/* 254 - Ll category */ TokError,
/* 255 - Ll category */ TokError
};
template <typename CharType>
ALWAYS_INLINE TokenType LiteralParser<CharType>::Lexer::lex(LiteralParserToken<CharType>& token)
{
#if !ASSERT_DISABLED
m_currentTokenID++;
#endif
while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr))
++m_ptr;
ASSERT(m_ptr <= m_end);
if (m_ptr == m_end) {
token.type = TokEnd;
token.start = token.end = m_ptr;
return TokEnd;
}
ASSERT(m_ptr < m_end);
token.type = TokError;
token.start = m_ptr;
CharType character = *m_ptr;
if (LIKELY(isLatin1(character))) {
TokenType tokenType = TokenTypesOfLatin1Characters[character];
switch (tokenType) {
case TokString:
if (character == '\'' && m_mode == StrictJSON) {
m_lexErrorMessage = "Single quotes (\') are not allowed in JSON"_s;
return TokError;
}
return lexString(token, character);
case TokIdentifier: {
switch (character) {
case 't':
if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
m_ptr += 4;
token.type = TokTrue;
token.end = m_ptr;
return TokTrue;
}
break;
case 'f':
if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
m_ptr += 5;
token.type = TokFalse;
token.end = m_ptr;
return TokFalse;
}
break;
case 'n':
if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
m_ptr += 4;
token.type = TokNull;
token.end = m_ptr;
return TokNull;
}
break;
}
return lexIdentifier(token);
}
case TokNumber:
return lexNumber(token);
case TokError:
break;
default:
ASSERT(tokenType == TokLBracket
|| tokenType == TokRBracket
|| tokenType == TokLBrace
|| tokenType == TokRBrace
|| tokenType == TokColon
|| tokenType == TokLParen
|| tokenType == TokRParen
|| tokenType == TokComma
|| tokenType == TokDot
|| tokenType == TokAssign
|| tokenType == TokSemi);
token.type = tokenType;
token.end = ++m_ptr;
return tokenType;
}
}
m_lexErrorMessage = makeString("Unrecognized token '", StringView { m_ptr, 1 }, '\'');
return TokError;
}
template <>
ALWAYS_INLINE TokenType LiteralParser<LChar>::Lexer::lexIdentifier(LiteralParserToken<LChar>& token)
{
while (m_ptr < m_end && (isASCIIAlphanumeric(*m_ptr) || *m_ptr == '_' || *m_ptr == '$'))
m_ptr++;
token.stringIs8Bit = 1;
token.stringToken8 = token.start;
token.stringLength = m_ptr - token.start;
token.type = TokIdentifier;
token.end = m_ptr;
return TokIdentifier;
}
template <>
ALWAYS_INLINE TokenType LiteralParser<UChar>::Lexer::lexIdentifier(LiteralParserToken<UChar>& token)
{
while (m_ptr < m_end && (isASCIIAlphanumeric(*m_ptr) || *m_ptr == '_' || *m_ptr == '$' || *m_ptr == 0x200C || *m_ptr == 0x200D))
m_ptr++;
token.stringIs8Bit = 0;
token.stringToken16 = token.start;
token.stringLength = m_ptr - token.start;
token.type = TokIdentifier;
token.end = m_ptr;
return TokIdentifier;
}
template <typename CharType>
TokenType LiteralParser<CharType>::Lexer::next()
{
TokenType result = lex(m_currentToken);
ASSERT(m_currentToken.type == result);
return result;
}
template <>
ALWAYS_INLINE void setParserTokenString<LChar>(LiteralParserToken<LChar>& token, const LChar* string)
{
token.stringIs8Bit = 1;
token.stringToken8 = string;
}
template <>
ALWAYS_INLINE void setParserTokenString<UChar>(LiteralParserToken<UChar>& token, const UChar* string)
{
token.stringIs8Bit = 0;
token.stringToken16 = string;
}
enum class SafeStringCharacterSet { Strict, NonStrict };
template <SafeStringCharacterSet set>
static ALWAYS_INLINE bool isSafeStringCharacter(LChar c, LChar terminator)
{
return (c >= ' ' && c != '\\' && c != terminator) || (c == '\t' && set != SafeStringCharacterSet::Strict);
}
template <SafeStringCharacterSet set>
static ALWAYS_INLINE bool isSafeStringCharacter(UChar c, UChar terminator)
{
return (c >= ' ' && (set == SafeStringCharacterSet::Strict || isLatin1(c)) && c != '\\' && c != terminator) || (c == '\t' && set != SafeStringCharacterSet::Strict);
}
template <typename CharType>
ALWAYS_INLINE TokenType LiteralParser<CharType>::Lexer::lexString(LiteralParserToken<CharType>& token, CharType terminator)
{
++m_ptr;
const CharType* runStart = m_ptr;
if (m_mode == StrictJSON) {
while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::Strict>(*m_ptr, terminator))
++m_ptr;
} else {
while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::NonStrict>(*m_ptr, terminator))
++m_ptr;
}
if (LIKELY(m_ptr < m_end && *m_ptr == terminator)) {
setParserTokenString<CharType>(token, runStart);
token.stringLength = m_ptr - runStart;
token.type = TokString;
token.end = ++m_ptr;
return TokString;
}
return lexStringSlow(token, runStart, terminator);
}
template <typename CharType>
TokenType LiteralParser<CharType>::Lexer::lexStringSlow(LiteralParserToken<CharType>& token, const CharType* runStart, CharType terminator)
{
m_builder.clear();
goto slowPathBegin;
do {
runStart = m_ptr;
if (m_mode == StrictJSON) {
while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::Strict>(*m_ptr, terminator))
++m_ptr;
} else {
while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::NonStrict>(*m_ptr, terminator))
++m_ptr;
}
if (!m_builder.isEmpty())
m_builder.appendCharacters(runStart, m_ptr - runStart);
slowPathBegin:
if ((m_mode != NonStrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
if (m_builder.isEmpty() && runStart < m_ptr)
m_builder.appendCharacters(runStart, m_ptr - runStart);
++m_ptr;
if (m_ptr >= m_end) {
m_lexErrorMessage = "Unterminated string"_s;
return TokError;
}
switch (*m_ptr) {
case '"':
m_builder.append('"');
m_ptr++;
break;
case '\\':
m_builder.append('\\');
m_ptr++;
break;
case '/':
m_builder.append('/');
m_ptr++;
break;
case 'b':
m_builder.append('\b');
m_ptr++;
break;
case 'f':
m_builder.append('\f');
m_ptr++;
break;
case 'n':
m_builder.append('\n');
m_ptr++;
break;
case 'r':
m_builder.append('\r');
m_ptr++;
break;
case 't':
m_builder.append('\t');
m_ptr++;
break;
case 'u':
if ((m_end - m_ptr) < 5) {
m_lexErrorMessage = "\\u must be followed by 4 hex digits"_s;
return TokError;
} // uNNNN == 5 characters
for (int i = 1; i < 5; i++) {
if (!isASCIIHexDigit(m_ptr[i])) {
m_lexErrorMessage = makeString("\"\\", StringView { m_ptr, 5 }, "\" is not a valid unicode escape");
return TokError;
}
}
m_builder.append(JSC::Lexer<CharType>::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
m_ptr += 5;
break;
default:
if (*m_ptr == '\'' && m_mode != StrictJSON) {
m_builder.append('\'');
m_ptr++;
break;
}
m_lexErrorMessage = makeString("Invalid escape character ", StringView { m_ptr, 1 });
return TokError;
}
}
} while ((m_mode != NonStrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != terminator);
if (m_ptr >= m_end || *m_ptr != terminator) {
m_lexErrorMessage = "Unterminated string"_s;
return TokError;
}
if (m_builder.isEmpty()) {
setParserTokenString<CharType>(token, runStart);
token.stringLength = m_ptr - runStart;
} else {
if (m_builder.is8Bit()) {
token.stringIs8Bit = 1;
token.stringToken8 = m_builder.characters8();
} else {
token.stringIs8Bit = 0;
token.stringToken16 = m_builder.characters16();
}
token.stringLength = m_builder.length();
}
token.type = TokString;
token.end = ++m_ptr;
return TokString;
}
template <typename CharType>
TokenType LiteralParser<CharType>::Lexer::lexNumber(LiteralParserToken<CharType>& token)
{
// ES5 and json.org define numbers as
// number
// int
// int frac? exp?
//
// int
// -? 0
// -? digit1-9 digits?
//
// digits
// digit digits?
//
// -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
if (m_ptr < m_end && *m_ptr == '-') // -?
++m_ptr;
// (0 | [1-9][0-9]*)
if (m_ptr < m_end && *m_ptr == '0') // 0
++m_ptr;
else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
++m_ptr;
// [0-9]*
while (m_ptr < m_end && isASCIIDigit(*m_ptr))
++m_ptr;
} else {
m_lexErrorMessage = "Invalid number"_s;
return TokError;
}
// ('.' [0-9]+)?
const int NumberOfDigitsForSafeInt32 = 9; // The numbers from -99999999 to 999999999 are always in range of Int32.
if (m_ptr < m_end && *m_ptr == '.') {
++m_ptr;
// [0-9]+
if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) {
m_lexErrorMessage = "Invalid digits after decimal point"_s;
return TokError;
}
++m_ptr;
while (m_ptr < m_end && isASCIIDigit(*m_ptr))
++m_ptr;
} else if (m_ptr < m_end && (*m_ptr != 'e' && *m_ptr != 'E') && (m_ptr - token.start) <= NumberOfDigitsForSafeInt32) {
int32_t result = 0;
token.type = TokNumber;
token.end = m_ptr;
const CharType* digit = token.start;
bool negative = false;
if (*digit == '-') {
negative = true;
digit++;
}
ASSERT((m_ptr - digit) <= NumberOfDigitsForSafeInt32);
while (digit < m_ptr)
result = result * 10 + (*digit++) - '0';
if (!negative)
token.numberToken = result;
else {
if (!result)
token.numberToken = -0.0;
else
token.numberToken = -result;
}
return TokNumber;
}
// ([eE][+-]? [0-9]+)?
if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
++m_ptr;
// [-+]?
if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
++m_ptr;
// [0-9]+
if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) {
m_lexErrorMessage = "Exponent symbols should be followed by an optional '+' or '-' and then by at least one number"_s;
return TokError;
}
++m_ptr;
while (m_ptr < m_end && isASCIIDigit(*m_ptr))
++m_ptr;
}
token.type = TokNumber;
token.end = m_ptr;
size_t parsedLength;
token.numberToken = parseDouble(token.start, token.end - token.start, parsedLength);
return TokNumber;
}
template <typename CharType>
JSValue LiteralParser<CharType>::parse(ParserState initialState)
{
VM& vm = m_globalObject->vm();
auto scope = DECLARE_THROW_SCOPE(vm);
ParserState state = initialState;
MarkedArgumentBuffer objectStack;
JSValue lastValue;
Vector<ParserState, 16, UnsafeVectorOverflow> stateStack;
Vector<Identifier, 16, UnsafeVectorOverflow> identifierStack;
HashSet<JSObject*> visitedUnderscoreProto;
while (1) {
switch(state) {
startParseArray:
case StartParseArray: {
JSArray* array = constructEmptyArray(m_globalObject, 0);
RETURN_IF_EXCEPTION(scope, JSValue());
objectStack.appendWithCrashOnOverflow(array);
}
doParseArrayStartExpression:
FALLTHROUGH;
case DoParseArrayStartExpression: {
TokenType lastToken = m_lexer.currentToken()->type;
if (m_lexer.next() == TokRBracket) {
if (lastToken == TokComma) {
m_parseErrorMessage = "Unexpected comma at the end of array expression"_s;
return JSValue();
}
m_lexer.next();
lastValue = objectStack.takeLast();
break;
}
stateStack.append(DoParseArrayEndExpression);
goto startParseExpression;
}
case DoParseArrayEndExpression: {
JSArray* array = asArray(objectStack.last());
array->putDirectIndex(m_globalObject, array->length(), lastValue);
RETURN_IF_EXCEPTION(scope, JSValue());
if (m_lexer.currentToken()->type == TokComma)
goto doParseArrayStartExpression;
if (m_lexer.currentToken()->type != TokRBracket) {
m_parseErrorMessage = "Expected ']'"_s;
return JSValue();
}
m_lexer.next();
lastValue = objectStack.takeLast();
break;
}
startParseObject:
case StartParseObject: {
JSObject* object = constructEmptyObject(m_globalObject);
objectStack.appendWithCrashOnOverflow(object);
TokenType type = m_lexer.next();
if (type == TokString || (m_mode != StrictJSON && type == TokIdentifier)) {
typename Lexer::LiteralParserTokenPtr identifierToken = m_lexer.currentToken();
if (identifierToken->stringIs8Bit)
identifierStack.append(makeIdentifier(identifierToken->stringToken8, identifierToken->stringLength));
else
identifierStack.append(makeIdentifier(identifierToken->stringToken16, identifierToken->stringLength));
// Check for colon
if (m_lexer.next() != TokColon) {
m_parseErrorMessage = "Expected ':' before value in object property definition"_s;
return JSValue();
}
m_lexer.next();
stateStack.append(DoParseObjectEndExpression);
goto startParseExpression;
}
if (type != TokRBrace) {
m_parseErrorMessage = "Expected '}'"_s;
return JSValue();
}
m_lexer.next();
lastValue = objectStack.takeLast();
break;
}
doParseObjectStartExpression:
case DoParseObjectStartExpression: {
TokenType type = m_lexer.next();
if (type != TokString && (m_mode == StrictJSON || type != TokIdentifier)) {
m_parseErrorMessage = "Property name must be a string literal"_s;
return JSValue();
}
typename Lexer::LiteralParserTokenPtr identifierToken = m_lexer.currentToken();
if (identifierToken->stringIs8Bit)
identifierStack.append(makeIdentifier(identifierToken->stringToken8, identifierToken->stringLength));
else
identifierStack.append(makeIdentifier(identifierToken->stringToken16, identifierToken->stringLength));
// Check for colon
if (m_lexer.next() != TokColon) {
m_parseErrorMessage = "Expected ':'"_s;
return JSValue();
}
m_lexer.next();
stateStack.append(DoParseObjectEndExpression);
goto startParseExpression;
}
case DoParseObjectEndExpression:
{
JSObject* object = asObject(objectStack.last());
Identifier ident = identifierStack.takeLast();
if (m_mode != StrictJSON && ident == vm.propertyNames->underscoreProto) {
if (!visitedUnderscoreProto.add(object).isNewEntry) {
m_parseErrorMessage = "Attempted to redefine __proto__ property"_s;
return JSValue();
}
PutPropertySlot slot(object, m_nullOrCodeBlock ? m_nullOrCodeBlock->isStrictMode() : false);
objectStack.last().put(m_globalObject, ident, lastValue, slot);
} else {
if (Optional<uint32_t> index = parseIndex(ident))
object->putDirectIndex(m_globalObject, index.value(), lastValue);
else
object->putDirect(vm, ident, lastValue);
}
RETURN_IF_EXCEPTION(scope, JSValue());
if (m_lexer.currentToken()->type == TokComma)
goto doParseObjectStartExpression;
if (m_lexer.currentToken()->type != TokRBrace) {
m_parseErrorMessage = "Expected '}'"_s;
return JSValue();
}
m_lexer.next();
lastValue = objectStack.takeLast();
break;
}
startParseExpression:
case StartParseExpression: {
switch (m_lexer.currentToken()->type) {
case TokLBracket:
goto startParseArray;
case TokLBrace:
goto startParseObject;
case TokString: {
typename Lexer::LiteralParserTokenPtr stringToken = m_lexer.currentToken();
if (stringToken->stringIs8Bit)
lastValue = jsString(vm, makeIdentifier(stringToken->stringToken8, stringToken->stringLength).string());
else
lastValue = jsString(vm, makeIdentifier(stringToken->stringToken16, stringToken->stringLength).string());
m_lexer.next();
break;
}
case TokNumber: {
typename Lexer::LiteralParserTokenPtr numberToken = m_lexer.currentToken();
lastValue = jsNumber(numberToken->numberToken);
m_lexer.next();
break;
}
case TokNull:
m_lexer.next();
lastValue = jsNull();
break;
case TokTrue:
m_lexer.next();
lastValue = jsBoolean(true);
break;
case TokFalse:
m_lexer.next();
lastValue = jsBoolean(false);
break;
case TokRBracket:
m_parseErrorMessage = "Unexpected token ']'"_s;
return JSValue();
case TokRBrace:
m_parseErrorMessage = "Unexpected token '}'"_s;
return JSValue();
case TokIdentifier: {
typename Lexer::LiteralParserTokenPtr token = m_lexer.currentToken();
if (token->stringIs8Bit)
m_parseErrorMessage = makeString("Unexpected identifier \"", StringView { token->stringToken8, token->stringLength }, '"');
else
m_parseErrorMessage = makeString("Unexpected identifier \"", StringView { token->stringToken16, token->stringLength }, '"');
return JSValue();
}
case TokColon:
m_parseErrorMessage = "Unexpected token ':'"_s;
return JSValue();
case TokLParen:
m_parseErrorMessage = "Unexpected token '('"_s;
return JSValue();
case TokRParen:
m_parseErrorMessage = "Unexpected token ')'"_s;
return JSValue();
case TokComma:
m_parseErrorMessage = "Unexpected token ','"_s;
return JSValue();
case TokDot:
m_parseErrorMessage = "Unexpected token '.'"_s;
return JSValue();
case TokAssign:
m_parseErrorMessage = "Unexpected token '='"_s;
return JSValue();
case TokSemi:
m_parseErrorMessage = "Unexpected token ';'"_s;
return JSValue();
case TokEnd:
m_parseErrorMessage = "Unexpected EOF"_s;
return JSValue();
case TokError:
default:
// Error
m_parseErrorMessage = "Could not parse value expression"_s;
return JSValue();
}
break;
}
case StartParseStatement: {
switch (m_lexer.currentToken()->type) {
case TokLBracket:
case TokNumber:
case TokString:
goto startParseExpression;
case TokLParen: {
m_lexer.next();
stateStack.append(StartParseStatementEndStatement);
goto startParseExpression;
}
case TokRBracket:
m_parseErrorMessage = "Unexpected token ']'"_s;
return JSValue();
case TokLBrace:
m_parseErrorMessage = "Unexpected token '{'"_s;
return JSValue();
case TokRBrace:
m_parseErrorMessage = "Unexpected token '}'"_s;
return JSValue();
case TokIdentifier:
m_parseErrorMessage = "Unexpected identifier"_s;
return JSValue();
case TokColon:
m_parseErrorMessage = "Unexpected token ':'"_s;
return JSValue();
case TokRParen:
m_parseErrorMessage = "Unexpected token ')'"_s;
return JSValue();
case TokComma:
m_parseErrorMessage = "Unexpected token ','"_s;
return JSValue();
case TokTrue:
m_parseErrorMessage = "Unexpected token 'true'"_s;
return JSValue();
case TokFalse:
m_parseErrorMessage = "Unexpected token 'false'"_s;
return JSValue();
case TokNull:
m_parseErrorMessage = "Unexpected token 'null'"_s;
return JSValue();
case TokEnd:
m_parseErrorMessage = "Unexpected EOF"_s;
return JSValue();
case TokDot:
m_parseErrorMessage = "Unexpected token '.'"_s;
return JSValue();
case TokAssign:
m_parseErrorMessage = "Unexpected token '='"_s;
return JSValue();
case TokSemi:
m_parseErrorMessage = "Unexpected token ';'"_s;
return JSValue();
case TokError:
default:
m_parseErrorMessage = "Could not parse statement"_s;
return JSValue();
}
}
case StartParseStatementEndStatement: {
ASSERT(stateStack.isEmpty());
if (m_lexer.currentToken()->type != TokRParen)
return JSValue();
if (m_lexer.next() == TokEnd)
return lastValue;
m_parseErrorMessage = "Unexpected content at end of JSON literal"_s;
return JSValue();
}
default:
RELEASE_ASSERT_NOT_REACHED();
}
if (stateStack.isEmpty())
return lastValue;
state = stateStack.takeLast();
continue;
}
}
// Instantiate the two flavors of LiteralParser we need instead of putting most of this file in LiteralParser.h
template class LiteralParser<LChar>;
template class LiteralParser<UChar>;
}