JavaScriptCore/parser/Lexer.cpp - WebKit - Git at Google

 /*
  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
  *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
  *
  *  This library is free software; you can redistribute it and/or
  *  modify it under the terms of the GNU Library General Public
  *  License as published by the Free Software Foundation; either
  *  version 2 of the License, or (at your option) any later version.
  *
  *  This library is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  *  Library General Public License for more details.
  *
  *  You should have received a copy of the GNU Library General Public License
  *  along with this library; see the file COPYING.LIB.  If not, write to
  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  *  Boston, MA 02110-1301, USA.
  *
  */

 #include "config.h"
 #include "Lexer.h"

 #include "JSFunction.h"
 #include "JSGlobalObjectFunctions.h"
 #include "NodeInfo.h"
 #include "Nodes.h"
 #include "dtoa.h"
 #include <ctype.h>
 #include <limits.h>
 #include <string.h>
 #include <wtf/Assertions.h>

 using namespace WTF;
 using namespace Unicode;

 // We can't specify the namespace in yacc's C output, so do it here instead.
 using namespace JSC;

 #ifndef KDE_USE_FINAL
 #include "Grammar.h"
 #endif

 #include "Lookup.h"
 #include "Lexer.lut.h"

 // A bridge for yacc from the C world to the C++ world.
 int jscyylex(void* lvalp, void* llocp, void* globalData)
 {
     return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
 }

 namespace JSC {

 static const UChar byteOrderMark = 0xFEFF;

 Lexer::Lexer(JSGlobalData* globalData)
     : m_isReparsing(false)
     , m_globalData(globalData)
     , m_keywordTable(JSC::mainTable)
 {
     m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
     m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
 }

 Lexer::~Lexer()
 {
     m_keywordTable.deleteTable();
 }

 inline const UChar* Lexer::currentCharacter() const
 {
     return m_code - 4;
 }

 inline int Lexer::currentOffset() const
 {
     return currentCharacter() - m_codeStart;
 }

 ALWAYS_INLINE void Lexer::shift1()
 {
     m_current = m_next1;
     m_next1 = m_next2;
     m_next2 = m_next3;
     if (LIKELY(m_code < m_codeEnd))
         m_next3 = m_code[0];
     else
         m_next3 = -1;

     ++m_code;
 }

 ALWAYS_INLINE void Lexer::shift2()
 {
     m_current = m_next2;
     m_next1 = m_next3;
     if (LIKELY(m_code + 1 < m_codeEnd)) {
         m_next2 = m_code[0];
         m_next3 = m_code[1];
     } else {
         m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
         m_next3 = -1;
     }

     m_code += 2;
 }

 ALWAYS_INLINE void Lexer::shift3()
 {
     m_current = m_next3;
     if (LIKELY(m_code + 2 < m_codeEnd)) {
         m_next1 = m_code[0];
         m_next2 = m_code[1];
         m_next3 = m_code[2];
     } else {
         m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
         m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
         m_next3 = -1;
     }

     m_code += 3;
 }

 ALWAYS_INLINE void Lexer::shift4()
 {
     if (LIKELY(m_code + 3 < m_codeEnd)) {
         m_current = m_code[0];
         m_next1 = m_code[1];
         m_next2 = m_code[2];
         m_next3 = m_code[3];
     } else {
         m_current = m_code < m_codeEnd ? m_code[0] : -1;
         m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
         m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
         m_next3 = -1;
     }

     m_code += 4;
 }

 void Lexer::setCode(const SourceCode& source)
 {
     m_lineNumber = source.firstLine();
     m_delimited = false;
     m_lastToken = -1;

     const UChar* data = source.provider()->data();

     m_source = &source;
     m_codeStart = data;
     m_code = data + source.startOffset();
     m_codeEnd = data + source.endOffset();
     m_error = false;
     m_atLineStart = true;

     // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
     // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
     if (source.provider()->hasBOMs()) {
         for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
             if (UNLIKELY(*p == byteOrderMark)) {
                 copyCodeWithoutBOMs();
                 break;
             }
         }
     }

     // Read the first characters into the 4-character buffer.
     shift4();
     ASSERT(currentOffset() == source.startOffset());
 }

 void Lexer::copyCodeWithoutBOMs()
 {
     // Note: In this case, the character offset data for debugging will be incorrect.
     // If it's important to correctly debug code with extraneous BOMs, then the caller
     // should strip the BOMs when creating the SourceProvider object and do its own
     // mapping of offsets within the stripped text to original text offset.

     m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
     for (const UChar* p = m_code; p < m_codeEnd; ++p) {
         UChar c = *p;
         if (c != byteOrderMark)
             m_codeWithoutBOMs.append(c);
     }
     ptrdiff_t startDelta = m_codeStart - m_code;
     m_code = m_codeWithoutBOMs.data();
     m_codeStart = m_code + startDelta;
     m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
 }

 void Lexer::shiftLineTerminator()
 {
     ASSERT(isLineTerminator(m_current));

     // Allow both CRLF and LFCR.
     if (m_current + m_next1 == '\n' + '\r')
         shift2();
     else
         shift1();

     ++m_lineNumber;
 }

 ALWAYS_INLINE Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
 {
     m_identifiers.append(Identifier(m_globalData, characters, length));
     return &m_identifiers.last();
 }

 inline bool Lexer::lastTokenWasRestrKeyword() const
 {
     return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
 }

 static NEVER_INLINE bool isNonASCIIIdentStart(int c)
 {
     return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
 }

 static inline bool isIdentStart(int c)
 {
     return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
 }

 static NEVER_INLINE bool isNonASCIIIdentPart(int c)
 {
     return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
         | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
 }

 static inline bool isIdentPart(int c)
 {
     return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
 }

 static inline int singleEscape(int c)
 {
     switch (c) {
         case 'b':
             return 0x08;
         case 't':
             return 0x09;
         case 'n':
             return 0x0A;
         case 'v':
             return 0x0B;
         case 'f':
             return 0x0C;
         case 'r':
             return 0x0D;
         default:
             return c;
     }
 }

 inline void Lexer::record8(int c)
 {
     ASSERT(c >= 0);
     ASSERT(c <= 0xFF);
     m_buffer8.append(static_cast<char>(c));
 }

 inline void Lexer::record16(UChar c)
 {
     m_buffer16.append(c);
 }

 inline void Lexer::record16(int c)
 {
     ASSERT(c >= 0);
     ASSERT(c <= USHRT_MAX);
     record16(UChar(static_cast<unsigned short>(c)));
 }

 int Lexer::lex(void* p1, void* p2)
 {
     ASSERT(!m_error);
     ASSERT(m_buffer8.isEmpty());
     ASSERT(m_buffer16.isEmpty());

     YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
     YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
     int token = 0;
     m_terminator = false;

 start:
     while (isWhiteSpace(m_current))
         shift1();

     int startOffset = currentOffset();

     if (m_current == -1) {
         if (!m_terminator && !m_delimited && !m_isReparsing) {
             // automatic semicolon insertion if program incomplete
             token = ';';
             goto doneSemicolon;
         }
         return 0;
     }

     m_delimited = false;
     switch (m_current) {
         case '>':
             if (m_next1 == '>' && m_next2 == '>') {
                 if (m_next3 == '=') {
                     shift4();
                     token = URSHIFTEQUAL;
                     break;
                 }
                 shift3();
                 token = URSHIFT;
                 break;
             }
             if (m_next1 == '>') {
                 if (m_next2 == '=') {
                     shift3();
                     token = RSHIFTEQUAL;
                     break;
                 }
                 shift2();
                 token = RSHIFT;
                 break;
             }
             if (m_next1 == '=') {
                 shift2();
                 token = GE;
                 break;
             }
             shift1();
             token = '>';
             break;
         case '=':
             if (m_next1 == '=') {
                 if (m_next2 == '=') {
                     shift3();
                     token = STREQ;
                     break;
                 }
                 shift2();
                 token = EQEQ;
                 break;
             }
             shift1();
             token = '=';
             break;
         case '!':
             if (m_next1 == '=') {
                 if (m_next2 == '=') {
                     shift3();
                     token = STRNEQ;
                     break;
                 }
                 shift2();
                 token = NE;
                 break;
             }
             shift1();
             token = '!';
             break;
         case '<':
             if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
                 // <!-- marks the beginning of a line comment (for www usage)
                 shift4();
                 goto inSingleLineComment;
             }
             if (m_next1 == '<') {
                 if (m_next2 == '=') {
                     shift3();
                     token = LSHIFTEQUAL;
                     break;
                 }
                 shift2();
                 token = LSHIFT;
                 break;
             }
             if (m_next1 == '=') {
                 shift2();
                 token = LE;
                 break;
             }
             shift1();
             token = '<';
             break;
         case '+':
             if (m_next1 == '+') {
                 shift2();
                 if (m_terminator) {
                     token = AUTOPLUSPLUS;
                     break;
                 }
                 token = PLUSPLUS;
                 break;
             }
             if (m_next1 == '=') {
                 shift2();
                 token = PLUSEQUAL;
                 break;
             }
             shift1();
             token = '+';
             break;
         case '-':
             if (m_next1 == '-') {
                 if (m_atLineStart && m_next2 == '>') {
                     shift3();
                     goto inSingleLineComment;
                 }
                 shift2();
                 if (m_terminator) {
                     token = AUTOMINUSMINUS;
                     break;
                 }
                 token = MINUSMINUS;
                 break;
             }
             if (m_next1 == '=') {
                 shift2();
                 token = MINUSEQUAL;
                 break;
             }
             shift1();
             token = '-';
             break;
         case '*':
             if (m_next1 == '=') {
                 shift2();
                 token = MULTEQUAL;
                 break;
             }
             shift1();
             token = '*';
             break;
         case '/':
             if (m_next1 == '/') {
                 shift2();
                 goto inSingleLineComment;
             }
             if (m_next1 == '*')
                 goto inMultiLineComment;
             if (m_next1 == '=') {
                 shift2();
                 token = DIVEQUAL;
                 break;
             }
             shift1();
             token = '/';
             break;
         case '&':
             if (m_next1 == '&') {
                 shift2();
                 token = AND;
                 break;
             }
             if (m_next1 == '=') {
                 shift2();
                 token = ANDEQUAL;
                 break;
             }
             shift1();
             token = '&';
             break;
         case '^':
             if (m_next1 == '=') {
                 shift2();
                 token = XOREQUAL;
                 break;
             }
             shift1();
             token = '^';
             break;
         case '%':
             if (m_next1 == '=') {
                 shift2();
                 token = MODEQUAL;
                 break;
             }
             shift1();
             token = '%';
             break;
         case '|':
             if (m_next1 == '=') {
                 shift2();
                 token = OREQUAL;
                 break;
             }
             if (m_next1 == '|') {
                 shift2();
                 token = OR;
                 break;
             }
             shift1();
             token = '|';
             break;
         case '.':
             if (isASCIIDigit(m_next1)) {
                 record8('.');
                 shift1();
                 goto inNumberAfterDecimalPoint;
             }
             token = '.';
             shift1();
             break;
         case ',':
         case '~':
         case '?':
         case ':':
         case '(':
         case ')':
         case '[':
         case ']':
             token = m_current;
             shift1();
             break;
         case ';':
             shift1();
             m_delimited = true;
             token = ';';
             break;
         case '{':
             lvalp->intValue = currentOffset();
             shift1();
             token = OPENBRACE;
             break;
         case '}':
             lvalp->intValue = currentOffset();
             shift1();
             m_delimited = true;
             token = CLOSEBRACE;
             break;
         case '\\':
             goto startIdentifierWithBackslash;
         case '0':
             goto startNumberWithZeroDigit;
         case '1':
         case '2':
         case '3':
         case '4':
         case '5':
         case '6':
         case '7':
         case '8':
         case '9':
             goto startNumber;
         case '"':
         case '\'':
             goto startString;
         default:
             if (isIdentStart(m_current))
                 goto startIdentifierOrKeyword;
             if (isLineTerminator(m_current)) {
                 shiftLineTerminator();
                 m_atLineStart = true;
                 m_terminator = true;
                 if (lastTokenWasRestrKeyword()) {
                     token = ';';
                     goto doneSemicolon;
                 }
                 goto start;
             }
             goto returnError;
     }

     m_atLineStart = false;
     goto returnToken;

 startString: {
     int stringQuoteCharacter = m_current;
     shift1();

     const UChar* stringStart = currentCharacter();
     while (m_current != stringQuoteCharacter) {
         // Fast check for characters that require special handling.
         // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
         // as possible, and lets through all common ASCII characters.
         if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
             m_buffer16.append(stringStart, currentCharacter() - stringStart);
             goto inString;
         }
         shift1();
     }
     lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
     shift1();
     m_atLineStart = false;
     m_delimited = false;
     token = STRING;
     goto returnToken;

 inString:
     while (m_current != stringQuoteCharacter) {
         if (m_current == '\\')
             goto inStringEscapeSequence;
         if (UNLIKELY(isLineTerminator(m_current)))
             goto returnError;
         if (UNLIKELY(m_current == -1))
             goto returnError;
         record16(m_current);
         shift1();
     }
     goto doneString;

 inStringEscapeSequence:
     shift1();
     if (m_current == 'x') {
         shift1();
         if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
             record16(convertHex(m_current, m_next1));
             shift2();
             goto inString;
         }
         record16('x');
         if (m_current == stringQuoteCharacter)
             goto doneString;
         goto inString;
     }
     if (m_current == 'u') {
         shift1();
         if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
             record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
             shift4();
             goto inString;
         }
         if (m_current == stringQuoteCharacter) {
             record16('u');
             goto doneString;
         }
         goto returnError;
     }
     if (isASCIIOctalDigit(m_current)) {
         if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
             record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
             shift3();
             goto inString;
         }
         if (isASCIIOctalDigit(m_next1)) {
             record16((m_current - '0') * 8 + m_next1 - '0');
             shift2();
             goto inString;
         }
         record16(m_current - '0');
         shift1();
         goto inString;
     }
     if (isLineTerminator(m_current)) {
         shiftLineTerminator();
         goto inString;
     }
     record16(singleEscape(m_current));
     shift1();
     goto inString;
 }

 startIdentifierWithBackslash:
     shift1();
     if (UNLIKELY(m_current != 'u'))
         goto returnError;
     shift1();
     if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
         goto returnError;
     token = convertUnicode(m_current, m_next1, m_next2, m_next3);
     if (UNLIKELY(!isIdentStart(token)))
         goto returnError;
     goto inIdentifierAfterCharacterCheck;

 startIdentifierOrKeyword: {
     const UChar* identifierStart = currentCharacter();
     shift1();
     while (isIdentPart(m_current))
         shift1();
     if (LIKELY(m_current != '\\')) {
         lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
         goto doneIdentifierOrKeyword;
     }
     m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
 }

     do {
         shift1();
         if (UNLIKELY(m_current != 'u'))
             goto returnError;
         shift1();
         if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
             goto returnError;
         token = convertUnicode(m_current, m_next1, m_next2, m_next3);
         if (UNLIKELY(!isIdentPart(token)))
             goto returnError;
 inIdentifierAfterCharacterCheck:
         record16(token);
         shift4();

         while (isIdentPart(m_current)) {
             record16(m_current);
             shift1();
         }
     } while (UNLIKELY(m_current == '\\'));
     goto doneIdentifier;

 inSingleLineComment:
     while (!isLineTerminator(m_current)) {
         if (UNLIKELY(m_current == -1))
             return 0;
         shift1();
     }
     shiftLineTerminator();
     m_atLineStart = true;
     m_terminator = true;
     if (lastTokenWasRestrKeyword())
         goto doneSemicolon;
     goto start;

 inMultiLineComment:
     shift2();
     while (m_current != '*' || m_next1 != '/') {
         if (isLineTerminator(m_current))
             shiftLineTerminator();
         else {
             shift1();
             if (UNLIKELY(m_current == -1))
                 goto returnError;
         }
     }
     shift2();
     m_atLineStart = false;
     goto start;

 startNumberWithZeroDigit:
     shift1();
     if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
         shift1();
         goto inHex;
     }
     if (m_current == '.') {
         record8('0');
         record8('.');
         shift1();
         goto inNumberAfterDecimalPoint;
     }
     if ((m_current | 0x20) == 'e') {
         record8('0');
         record8('e');
         shift1();
         goto inExponentIndicator;
     }
     if (isASCIIOctalDigit(m_current))
         goto inOctal;
     if (isASCIIDigit(m_current))
         goto startNumber;
     lvalp->doubleValue = 0;
     goto doneNumeric;

 inNumberAfterDecimalPoint:
     while (isASCIIDigit(m_current)) {
         record8(m_current);
         shift1();
     }
     if ((m_current | 0x20) == 'e') {
         record8('e');
         shift1();
         goto inExponentIndicator;
     }
     goto doneNumber;

 inExponentIndicator:
     if (m_current == '+' || m_current == '-') {
         record8(m_current);
         shift1();
     }
     if (!isASCIIDigit(m_current))
         goto returnError;
     do {
         record8(m_current);
         shift1();
     } while (isASCIIDigit(m_current));
     goto doneNumber;

 inOctal: {
     do {
         record8(m_current);
         shift1();
     } while (isASCIIOctalDigit(m_current));
     if (isASCIIDigit(m_current))
         goto startNumber;

     double dval = 0;

     const char* end = m_buffer8.end();
     for (const char* p = m_buffer8.data(); p < end; ++p) {
         dval *= 8;
         dval += *p - '0';
     }
     if (dval >= mantissaOverflowLowerBound)
         dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);

     m_buffer8.resize(0);

     lvalp->doubleValue = dval;
     goto doneNumeric;
 }

 inHex: {
     do {
         record8(m_current);
         shift1();
     } while (isASCIIHexDigit(m_current));

     double dval = 0;

     const char* end = m_buffer8.end();
     for (const char* p = m_buffer8.data(); p < end; ++p) {
         dval *= 16;
         dval += toASCIIHexValue(*p);
     }
     if (dval >= mantissaOverflowLowerBound)
         dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);

     m_buffer8.resize(0);

     lvalp->doubleValue = dval;
     goto doneNumeric;
 }

 startNumber:
     record8(m_current);
     shift1();
     while (isASCIIDigit(m_current)) {
         record8(m_current);
         shift1();
     }
     if (m_current == '.') {
         record8('.');
         shift1();
         goto inNumberAfterDecimalPoint;
     }
     if ((m_current | 0x20) == 'e') {
         record8('e');
         shift1();
         goto inExponentIndicator;
     }

     // Fall through into doneNumber.

 doneNumber:
     // Null-terminate string for strtod.
     m_buffer8.append('\0');
     lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
     m_buffer8.resize(0);

     // Fall through into doneNumeric.

 doneNumeric:
     // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
     if (UNLIKELY(isIdentStart(m_current)))
         goto returnError;

     m_atLineStart = false;
     m_delimited = false;
     token = NUMBER;
     goto returnToken;

 doneSemicolon:
     token = ';';
     m_delimited = true;
     goto returnToken;

 doneIdentifier:
     m_atLineStart = false;
     m_delimited = false;
     lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
     m_buffer16.resize(0);
     token = IDENT;
     goto returnToken;

 doneIdentifierOrKeyword: {
     m_atLineStart = false;
     m_delimited = false;
     m_buffer16.resize(0);
     const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
     token = entry ? entry->lexerValue() : IDENT;
     goto returnToken;
 }

 doneString:
     // Atomize constant strings in case they're later used in property lookup.
     shift1();
     m_atLineStart = false;
     m_delimited = false;
     lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
     m_buffer16.resize(0);
     token = STRING;

     // Fall through into returnToken.

 returnToken: {
     int lineNumber = m_lineNumber;
     llocp->first_line = lineNumber;
     llocp->last_line = lineNumber;
     llocp->first_column = startOffset;
     llocp->last_column = currentOffset();

     m_lastToken = token;
     return token;
 }

 returnError:
     m_error = true;
     return -1;
 }

 bool Lexer::scanRegExp()
 {
     ASSERT(m_buffer16.isEmpty());

     bool lastWasEscape = false;
     bool inBrackets = false;

     while (true) {
         if (isLineTerminator(m_current) || m_current == -1)
             return false;
         if (m_current != '/' || lastWasEscape || inBrackets) {
             // keep track of '[' and ']'
             if (!lastWasEscape) {
                 if (m_current == '[' && !inBrackets)
                     inBrackets = true;
                 if (m_current == ']' && inBrackets)
                     inBrackets = false;
             }
             record16(m_current);
             lastWasEscape = !lastWasEscape && m_current == '\\';
         } else { // end of regexp
             m_pattern = UString(m_buffer16);
             m_buffer16.resize(0);
             shift1();
             break;
         }
         shift1();
     }

     while (isIdentPart(m_current)) {
         record16(m_current);
         shift1();
     }
     m_flags = UString(m_buffer16);
     m_buffer16.resize(0);

     return true;
 }

 void Lexer::clear()
 {
     m_identifiers.clear();
     m_codeWithoutBOMs.clear();

     Vector<char> newBuffer8;
     newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
     m_buffer8.swap(newBuffer8);

     Vector<UChar> newBuffer16;
     newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
     m_buffer16.swap(newBuffer16);

     m_isReparsing = false;

     m_pattern = UString();
     m_flags = UString();
 }

 SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
 {
     if (m_codeWithoutBOMs.isEmpty())
         return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);

     const UChar* data = m_source->provider()->data();

     ASSERT(openBrace < closeBrace);

     int numBOMsBeforeOpenBrace = 0;
     int numBOMsBetweenBraces = 0;

     int i;
     for (i = m_source->startOffset(); i < openBrace; ++i)
         numBOMsBeforeOpenBrace += data[i] == byteOrderMark;
     for (; i < closeBrace; ++i)
         numBOMsBetweenBraces += data[i] == byteOrderMark;

     return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace,
         closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine);
 }

 } // namespace JSC
	/*
	* Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
	* Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
	* Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
	*
	* This library is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Library General Public
	* License as published by the Free Software Foundation; either
	* version 2 of the License, or (at your option) any later version.
	*
	* This library is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Library General Public License for more details.
	*
	* You should have received a copy of the GNU Library General Public License
	* along with this library; see the file COPYING.LIB. If not, write to
	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
	* Boston, MA 02110-1301, USA.
	*
	*/

	#include "config.h"
	#include "Lexer.h"

	#include "JSFunction.h"
	#include "JSGlobalObjectFunctions.h"
	#include "NodeInfo.h"
	#include "Nodes.h"
	#include "dtoa.h"
	#include <ctype.h>
	#include <limits.h>
	#include <string.h>
	#include <wtf/Assertions.h>

	using namespace WTF;
	using namespace Unicode;

	// We can't specify the namespace in yacc's C output, so do it here instead.
	using namespace JSC;

	#ifndef KDE_USE_FINAL
	#include "Grammar.h"
	#endif

	#include "Lookup.h"
	#include "Lexer.lut.h"

	// A bridge for yacc from the C world to the C++ world.
	int jscyylex(void* lvalp, void* llocp, void* globalData)
	{
	return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
	}

	namespace JSC {

	static const UChar byteOrderMark = 0xFEFF;

	Lexer::Lexer(JSGlobalData* globalData)
	: m_isReparsing(false)
	, m_globalData(globalData)
	, m_keywordTable(JSC::mainTable)
	{
	m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
	m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
	}

	Lexer::~Lexer()
	{
	m_keywordTable.deleteTable();
	}

	inline const UChar* Lexer::currentCharacter() const
	{
	return m_code - 4;
	}

	inline int Lexer::currentOffset() const
	{
	return currentCharacter() - m_codeStart;
	}

	ALWAYS_INLINE void Lexer::shift1()
	{
	m_current = m_next1;
	m_next1 = m_next2;
	m_next2 = m_next3;
	if (LIKELY(m_code < m_codeEnd))
	m_next3 = m_code[0];
	else
	m_next3 = -1;

	++m_code;
	}

	ALWAYS_INLINE void Lexer::shift2()
	{
	m_current = m_next2;
	m_next1 = m_next3;
	if (LIKELY(m_code + 1 < m_codeEnd)) {
	m_next2 = m_code[0];
	m_next3 = m_code[1];
	} else {
	m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
	m_next3 = -1;
	}

	m_code += 2;
	}

	ALWAYS_INLINE void Lexer::shift3()
	{
	m_current = m_next3;
	if (LIKELY(m_code + 2 < m_codeEnd)) {
	m_next1 = m_code[0];
	m_next2 = m_code[1];
	m_next3 = m_code[2];
	} else {
	m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
	m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
	m_next3 = -1;
	}

	m_code += 3;
	}

	ALWAYS_INLINE void Lexer::shift4()
	{
	if (LIKELY(m_code + 3 < m_codeEnd)) {
	m_current = m_code[0];
	m_next1 = m_code[1];
	m_next2 = m_code[2];
	m_next3 = m_code[3];
	} else {
	m_current = m_code < m_codeEnd ? m_code[0] : -1;
	m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
	m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
	m_next3 = -1;
	}

	m_code += 4;
	}

	void Lexer::setCode(const SourceCode& source)
	{
	m_lineNumber = source.firstLine();
	m_delimited = false;
	m_lastToken = -1;

	const UChar* data = source.provider()->data();

	m_source = &source;
	m_codeStart = data;
	m_code = data + source.startOffset();
	m_codeEnd = data + source.endOffset();
	m_error = false;
	m_atLineStart = true;

	// ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
	// See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
	if (source.provider()->hasBOMs()) {
	for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
	if (UNLIKELY(*p == byteOrderMark)) {
	copyCodeWithoutBOMs();
	break;
	}
	}
	}

	// Read the first characters into the 4-character buffer.
	shift4();
	ASSERT(currentOffset() == source.startOffset());
	}

	void Lexer::copyCodeWithoutBOMs()
	{
	// Note: In this case, the character offset data for debugging will be incorrect.
	// If it's important to correctly debug code with extraneous BOMs, then the caller
	// should strip the BOMs when creating the SourceProvider object and do its own
	// mapping of offsets within the stripped text to original text offset.

	m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
	for (const UChar* p = m_code; p < m_codeEnd; ++p) {
	UChar c = *p;
	if (c != byteOrderMark)
	m_codeWithoutBOMs.append(c);
	}
	ptrdiff_t startDelta = m_codeStart - m_code;
	m_code = m_codeWithoutBOMs.data();
	m_codeStart = m_code + startDelta;
	m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
	}

	void Lexer::shiftLineTerminator()
	{
	ASSERT(isLineTerminator(m_current));

	// Allow both CRLF and LFCR.
	if (m_current + m_next1 == '\n' + '\r')
	shift2();
	else
	shift1();

	++m_lineNumber;
	}

	ALWAYS_INLINE Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
	{
	m_identifiers.append(Identifier(m_globalData, characters, length));
	return &m_identifiers.last();
	}

	inline bool Lexer::lastTokenWasRestrKeyword() const
	{
	return m_lastToken == CONTINUE \|\| m_lastToken == BREAK \|\| m_lastToken == RETURN \|\| m_lastToken == THROW;
	}

	static NEVER_INLINE bool isNonASCIIIdentStart(int c)
	{
	return category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other);
	}

	static inline bool isIdentStart(int c)
	{
	return isASCII(c) ? isASCIIAlpha(c) \|\| c == '$' \|\| c == '_' : isNonASCIIIdentStart(c);
	}

	static NEVER_INLINE bool isNonASCIIIdentPart(int c)
	{
	return category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other
	\| Mark_NonSpacing \| Mark_SpacingCombining \| Number_DecimalDigit \| Punctuation_Connector);
	}

	static inline bool isIdentPart(int c)
	{
	return isASCII(c) ? isASCIIAlphanumeric(c) \|\| c == '$' \|\| c == '_' : isNonASCIIIdentPart(c);
	}

	static inline int singleEscape(int c)
	{
	switch (c) {
	case 'b':
	return 0x08;
	case 't':
	return 0x09;
	case 'n':
	return 0x0A;
	case 'v':
	return 0x0B;
	case 'f':
	return 0x0C;
	case 'r':
	return 0x0D;
	default:
	return c;
	}
	}

	inline void Lexer::record8(int c)
	{
	ASSERT(c >= 0);
	ASSERT(c <= 0xFF);
	m_buffer8.append(static_cast<char>(c));
	}

	inline void Lexer::record16(UChar c)
	{
	m_buffer16.append(c);
	}

	inline void Lexer::record16(int c)
	{
	ASSERT(c >= 0);
	ASSERT(c <= USHRT_MAX);
	record16(UChar(static_cast<unsigned short>(c)));
	}

	int Lexer::lex(void* p1, void* p2)
	{
	ASSERT(!m_error);
	ASSERT(m_buffer8.isEmpty());
	ASSERT(m_buffer16.isEmpty());

	YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
	YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
	int token = 0;
	m_terminator = false;

	start:
	while (isWhiteSpace(m_current))
	shift1();

	int startOffset = currentOffset();

	if (m_current == -1) {
	if (!m_terminator && !m_delimited && !m_isReparsing) {
	// automatic semicolon insertion if program incomplete
	token = ';';
	goto doneSemicolon;
	}
	return 0;
	}

	m_delimited = false;
	switch (m_current) {
	case '>':
	if (m_next1 == '>' && m_next2 == '>') {
	if (m_next3 == '=') {
	shift4();
	token = URSHIFTEQUAL;
	break;
	}
	shift3();
	token = URSHIFT;
	break;
	}
	if (m_next1 == '>') {
	if (m_next2 == '=') {
	shift3();
	token = RSHIFTEQUAL;
	break;
	}
	shift2();
	token = RSHIFT;
	break;
	}
	if (m_next1 == '=') {
	shift2();
	token = GE;
	break;
	}
	shift1();
	token = '>';
	break;
	case '=':
	if (m_next1 == '=') {
	if (m_next2 == '=') {
	shift3();
	token = STREQ;
	break;
	}
	shift2();
	token = EQEQ;
	break;
	}
	shift1();
	token = '=';
	break;
	case '!':
	if (m_next1 == '=') {
	if (m_next2 == '=') {
	shift3();
	token = STRNEQ;
	break;
	}
	shift2();
	token = NE;
	break;
	}
	shift1();
	token = '!';
	break;
	case '<':
	if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
	// <!-- marks the beginning of a line comment (for www usage)
	shift4();
	goto inSingleLineComment;
	}
	if (m_next1 == '<') {
	if (m_next2 == '=') {
	shift3();
	token = LSHIFTEQUAL;
	break;
	}
	shift2();
	token = LSHIFT;
	break;
	}
	if (m_next1 == '=') {
	shift2();
	token = LE;
	break;
	}
	shift1();
	token = '<';
	break;
	case '+':
	if (m_next1 == '+') {
	shift2();
	if (m_terminator) {
	token = AUTOPLUSPLUS;
	break;
	}
	token = PLUSPLUS;
	break;
	}
	if (m_next1 == '=') {
	shift2();
	token = PLUSEQUAL;
	break;
	}
	shift1();
	token = '+';
	break;
	case '-':
	if (m_next1 == '-') {
	if (m_atLineStart && m_next2 == '>') {
	shift3();
	goto inSingleLineComment;
	}
	shift2();
	if (m_terminator) {
	token = AUTOMINUSMINUS;
	break;
	}
	token = MINUSMINUS;
	break;
	}
	if (m_next1 == '=') {
	shift2();
	token = MINUSEQUAL;
	break;
	}
	shift1();
	token = '-';
	break;
	case '*':
	if (m_next1 == '=') {
	shift2();
	token = MULTEQUAL;
	break;
	}
	shift1();
	token = '*';
	break;
	case '/':
	if (m_next1 == '/') {
	shift2();
	goto inSingleLineComment;
	}
	if (m_next1 == '*')
	goto inMultiLineComment;
	if (m_next1 == '=') {
	shift2();
	token = DIVEQUAL;
	break;
	}
	shift1();
	token = '/';
	break;
	case '&':
	if (m_next1 == '&') {
	shift2();
	token = AND;
	break;
	}
	if (m_next1 == '=') {
	shift2();
	token = ANDEQUAL;
	break;
	}
	shift1();
	token = '&';
	break;
	case '^':
	if (m_next1 == '=') {
	shift2();
	token = XOREQUAL;
	break;
	}
	shift1();
	token = '^';
	break;
	case '%':
	if (m_next1 == '=') {
	shift2();
	token = MODEQUAL;
	break;
	}
	shift1();
	token = '%';
	break;
	case '\|':
	if (m_next1 == '=') {
	shift2();
	token = OREQUAL;
	break;
	}
	if (m_next1 == '\|') {
	shift2();
	token = OR;
	break;
	}
	shift1();
	token = '\|';
	break;
	case '.':
	if (isASCIIDigit(m_next1)) {
	record8('.');
	shift1();
	goto inNumberAfterDecimalPoint;
	}
	token = '.';
	shift1();
	break;
	case ',':
	case '~':
	case '?':
	case ':':
	case '(':
	case ')':
	case '[':
	case ']':
	token = m_current;
	shift1();
	break;
	case ';':
	shift1();
	m_delimited = true;
	token = ';';
	break;
	case '{':
	lvalp->intValue = currentOffset();
	shift1();
	token = OPENBRACE;
	break;
	case '}':
	lvalp->intValue = currentOffset();
	shift1();
	m_delimited = true;
	token = CLOSEBRACE;
	break;
	case '\\':
	goto startIdentifierWithBackslash;
	case '0':
	goto startNumberWithZeroDigit;
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	case '8':
	case '9':
	goto startNumber;
	case '"':
	case '\'':
	goto startString;
	default:
	if (isIdentStart(m_current))
	goto startIdentifierOrKeyword;
	if (isLineTerminator(m_current)) {
	shiftLineTerminator();
	m_atLineStart = true;
	m_terminator = true;
	if (lastTokenWasRestrKeyword()) {
	token = ';';
	goto doneSemicolon;
	}
	goto start;
	}
	goto returnError;
	}

	m_atLineStart = false;
	goto returnToken;

	startString: {
	int stringQuoteCharacter = m_current;
	shift1();

	const UChar* stringStart = currentCharacter();
	while (m_current != stringQuoteCharacter) {
	// Fast check for characters that require special handling.
	// Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
	// as possible, and lets through all common ASCII characters.
	if (UNLIKELY(m_current == '\\') \|\| UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
	m_buffer16.append(stringStart, currentCharacter() - stringStart);
	goto inString;
	}
	shift1();
	}
	lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
	shift1();
	m_atLineStart = false;
	m_delimited = false;
	token = STRING;
	goto returnToken;

	inString:
	while (m_current != stringQuoteCharacter) {
	if (m_current == '\\')
	goto inStringEscapeSequence;
	if (UNLIKELY(isLineTerminator(m_current)))
	goto returnError;
	if (UNLIKELY(m_current == -1))
	goto returnError;
	record16(m_current);
	shift1();
	}
	goto doneString;

	inStringEscapeSequence:
	shift1();
	if (m_current == 'x') {
	shift1();
	if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
	record16(convertHex(m_current, m_next1));
	shift2();
	goto inString;
	}
	record16('x');
	if (m_current == stringQuoteCharacter)
	goto doneString;
	goto inString;
	}
	if (m_current == 'u') {
	shift1();
	if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
	record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
	shift4();
	goto inString;
	}
	if (m_current == stringQuoteCharacter) {
	record16('u');
	goto doneString;
	}
	goto returnError;
	}
	if (isASCIIOctalDigit(m_current)) {
	if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
	record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
	shift3();
	goto inString;
	}
	if (isASCIIOctalDigit(m_next1)) {
	record16((m_current - '0') * 8 + m_next1 - '0');
	shift2();
	goto inString;
	}
	record16(m_current - '0');
	shift1();
	goto inString;
	}
	if (isLineTerminator(m_current)) {
	shiftLineTerminator();
	goto inString;
	}
	record16(singleEscape(m_current));
	shift1();
	goto inString;
	}

	startIdentifierWithBackslash:
	shift1();
	if (UNLIKELY(m_current != 'u'))
	goto returnError;
	shift1();
	if (UNLIKELY(!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(m_next1) \|\| !isASCIIHexDigit(m_next2) \|\| !isASCIIHexDigit(m_next3)))
	goto returnError;
	token = convertUnicode(m_current, m_next1, m_next2, m_next3);
	if (UNLIKELY(!isIdentStart(token)))
	goto returnError;
	goto inIdentifierAfterCharacterCheck;

	startIdentifierOrKeyword: {
	const UChar* identifierStart = currentCharacter();
	shift1();
	while (isIdentPart(m_current))
	shift1();
	if (LIKELY(m_current != '\\')) {
	lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
	goto doneIdentifierOrKeyword;
	}
	m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
	}

	do {
	shift1();
	if (UNLIKELY(m_current != 'u'))
	goto returnError;
	shift1();
	if (UNLIKELY(!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(m_next1) \|\| !isASCIIHexDigit(m_next2) \|\| !isASCIIHexDigit(m_next3)))
	goto returnError;
	token = convertUnicode(m_current, m_next1, m_next2, m_next3);
	if (UNLIKELY(!isIdentPart(token)))
	goto returnError;
	inIdentifierAfterCharacterCheck:
	record16(token);
	shift4();

	while (isIdentPart(m_current)) {
	record16(m_current);
	shift1();
	}
	} while (UNLIKELY(m_current == '\\'));
	goto doneIdentifier;

	inSingleLineComment:
	while (!isLineTerminator(m_current)) {
	if (UNLIKELY(m_current == -1))
	return 0;
	shift1();
	}
	shiftLineTerminator();
	m_atLineStart = true;
	m_terminator = true;
	if (lastTokenWasRestrKeyword())
	goto doneSemicolon;
	goto start;

	inMultiLineComment:
	shift2();
	while (m_current != '*' \|\| m_next1 != '/') {
	if (isLineTerminator(m_current))
	shiftLineTerminator();
	else {
	shift1();
	if (UNLIKELY(m_current == -1))
	goto returnError;
	}
	}
	shift2();
	m_atLineStart = false;
	goto start;

	startNumberWithZeroDigit:
	shift1();
	if ((m_current \| 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
	shift1();
	goto inHex;
	}
	if (m_current == '.') {
	record8('0');
	record8('.');
	shift1();
	goto inNumberAfterDecimalPoint;
	}
	if ((m_current \| 0x20) == 'e') {
	record8('0');
	record8('e');
	shift1();
	goto inExponentIndicator;
	}
	if (isASCIIOctalDigit(m_current))
	goto inOctal;
	if (isASCIIDigit(m_current))
	goto startNumber;
	lvalp->doubleValue = 0;
	goto doneNumeric;

	inNumberAfterDecimalPoint:
	while (isASCIIDigit(m_current)) {
	record8(m_current);
	shift1();
	}
	if ((m_current \| 0x20) == 'e') {
	record8('e');
	shift1();
	goto inExponentIndicator;
	}
	goto doneNumber;

	inExponentIndicator:
	if (m_current == '+' \|\| m_current == '-') {
	record8(m_current);
	shift1();
	}
	if (!isASCIIDigit(m_current))
	goto returnError;
	do {
	record8(m_current);
	shift1();
	} while (isASCIIDigit(m_current));
	goto doneNumber;

	inOctal: {
	do {
	record8(m_current);
	shift1();
	} while (isASCIIOctalDigit(m_current));
	if (isASCIIDigit(m_current))
	goto startNumber;

	double dval = 0;

	const char* end = m_buffer8.end();
	for (const char* p = m_buffer8.data(); p < end; ++p) {
	dval *= 8;
	dval += *p - '0';
	}
	if (dval >= mantissaOverflowLowerBound)
	dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);

	m_buffer8.resize(0);

	lvalp->doubleValue = dval;
	goto doneNumeric;
	}

	inHex: {
	do {
	record8(m_current);
	shift1();
	} while (isASCIIHexDigit(m_current));

	double dval = 0;

	const char* end = m_buffer8.end();
	for (const char* p = m_buffer8.data(); p < end; ++p) {
	dval *= 16;
	dval += toASCIIHexValue(*p);
	}
	if (dval >= mantissaOverflowLowerBound)
	dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);

	m_buffer8.resize(0);

	lvalp->doubleValue = dval;
	goto doneNumeric;
	}

	startNumber:
	record8(m_current);
	shift1();
	while (isASCIIDigit(m_current)) {
	record8(m_current);
	shift1();
	}
	if (m_current == '.') {
	record8('.');
	shift1();
	goto inNumberAfterDecimalPoint;
	}
	if ((m_current \| 0x20) == 'e') {
	record8('e');
	shift1();
	goto inExponentIndicator;
	}

	// Fall through into doneNumber.

	doneNumber:
	// Null-terminate string for strtod.
	m_buffer8.append('\0');
	lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
	m_buffer8.resize(0);

	// Fall through into doneNumeric.

	doneNumeric:
	// No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
	if (UNLIKELY(isIdentStart(m_current)))
	goto returnError;

	m_atLineStart = false;
	m_delimited = false;
	token = NUMBER;
	goto returnToken;

	doneSemicolon:
	token = ';';
	m_delimited = true;
	goto returnToken;

	doneIdentifier:
	m_atLineStart = false;
	m_delimited = false;
	lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
	m_buffer16.resize(0);
	token = IDENT;
	goto returnToken;

	doneIdentifierOrKeyword: {
	m_atLineStart = false;
	m_delimited = false;
	m_buffer16.resize(0);
	const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
	token = entry ? entry->lexerValue() : IDENT;
	goto returnToken;
	}

	doneString:
	// Atomize constant strings in case they're later used in property lookup.
	shift1();
	m_atLineStart = false;
	m_delimited = false;
	lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
	m_buffer16.resize(0);
	token = STRING;

	// Fall through into returnToken.

	returnToken: {
	int lineNumber = m_lineNumber;
	llocp->first_line = lineNumber;
	llocp->last_line = lineNumber;
	llocp->first_column = startOffset;
	llocp->last_column = currentOffset();

	m_lastToken = token;
	return token;
	}

	returnError:
	m_error = true;
	return -1;
	}

	bool Lexer::scanRegExp()
	{
	ASSERT(m_buffer16.isEmpty());

	bool lastWasEscape = false;
	bool inBrackets = false;

	while (true) {
	if (isLineTerminator(m_current) \|\| m_current == -1)
	return false;
	if (m_current != '/' \|\| lastWasEscape \|\| inBrackets) {
	// keep track of '[' and ']'
	if (!lastWasEscape) {
	if (m_current == '[' && !inBrackets)
	inBrackets = true;
	if (m_current == ']' && inBrackets)
	inBrackets = false;
	}
	record16(m_current);
	lastWasEscape = !lastWasEscape && m_current == '\\';
	} else { // end of regexp
	m_pattern = UString(m_buffer16);
	m_buffer16.resize(0);
	shift1();
	break;
	}
	shift1();
	}

	while (isIdentPart(m_current)) {
	record16(m_current);
	shift1();
	}
	m_flags = UString(m_buffer16);
	m_buffer16.resize(0);

	return true;
	}

	void Lexer::clear()
	{
	m_identifiers.clear();
	m_codeWithoutBOMs.clear();

	Vector<char> newBuffer8;
	newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
	m_buffer8.swap(newBuffer8);

	Vector<UChar> newBuffer16;
	newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
	m_buffer16.swap(newBuffer16);

	m_isReparsing = false;

	m_pattern = UString();
	m_flags = UString();
	}

	SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
	{
	if (m_codeWithoutBOMs.isEmpty())
	return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);

	const UChar* data = m_source->provider()->data();

	ASSERT(openBrace < closeBrace);

	int numBOMsBeforeOpenBrace = 0;
	int numBOMsBetweenBraces = 0;

	int i;
	for (i = m_source->startOffset(); i < openBrace; ++i)
	numBOMsBeforeOpenBrace += data[i] == byteOrderMark;
	for (; i < closeBrace; ++i)
	numBOMsBetweenBraces += data[i] == byteOrderMark;

	return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace,
	closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine);
	}

	} // namespace JSC