| /* |
| * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) |
| * Copyright (C) 2006, 2007, 2008 Apple Inc. All Rights Reserved. |
| * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca) |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Library General Public |
| * License as published by the Free Software Foundation; either |
| * version 2 of the License, or (at your option) any later version. |
| * |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Library General Public License for more details. |
| * |
| * You should have received a copy of the GNU Library General Public License |
| * along with this library; see the file COPYING.LIB. If not, write to |
| * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| * Boston, MA 02110-1301, USA. |
| * |
| */ |
| |
| #include "config.h" |
| #include "lexer.h" |
| |
| #include "dtoa.h" |
| #include "JSFunction.h" |
| #include "nodes.h" |
| #include "NodeInfo.h" |
| #include "JSGlobalObjectFunctions.h" |
| #include <ctype.h> |
| #include <limits.h> |
| #include <string.h> |
| #include <wtf/Assertions.h> |
| #include <wtf/unicode/Unicode.h> |
| |
| using namespace WTF; |
| using namespace Unicode; |
| |
| // we can't specify the namespace in yacc's C output, so do it here |
| using namespace JSC; |
| |
| #ifndef KDE_USE_FINAL |
| #include "grammar.h" |
| #endif |
| |
| #include "lookup.h" |
| #include "lexer.lut.h" |
| |
| // a bridge for yacc from the C world to C++ |
| int kjsyylex(void* lvalp, void* llocp, void* globalData) |
| { |
| return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp); |
| } |
| |
| namespace JSC { |
| |
| static bool isDecimalDigit(int); |
| |
| static const size_t initialReadBufferCapacity = 32; |
| static const size_t initialStringTableCapacity = 64; |
| |
| Lexer::Lexer(JSGlobalData* globalData) |
| : yylineno(1) |
| , m_restrKeyword(false) |
| , m_eatNextIdentifier(false) |
| , m_stackToken(-1) |
| , m_lastToken(-1) |
| , m_position(0) |
| , m_code(0) |
| , m_length(0) |
| , m_atLineStart(true) |
| , m_current(0) |
| , m_next1(0) |
| , m_next2(0) |
| , m_next3(0) |
| , m_currentOffset(0) |
| , m_nextOffset1(0) |
| , m_nextOffset2(0) |
| , m_nextOffset3(0) |
| , m_globalData(globalData) |
| , m_mainTable(JSC::mainTable) |
| { |
| m_buffer8.reserveCapacity(initialReadBufferCapacity); |
| m_buffer16.reserveCapacity(initialReadBufferCapacity); |
| m_strings.reserveCapacity(initialStringTableCapacity); |
| m_identifiers.reserveCapacity(initialStringTableCapacity); |
| } |
| |
| Lexer::~Lexer() |
| { |
| m_mainTable.deleteTable(); |
| } |
| |
| void Lexer::setCode(int startingLineNumber, PassRefPtr<SourceProvider> source) |
| { |
| yylineno = startingLineNumber; |
| m_restrKeyword = false; |
| m_delimited = false; |
| m_eatNextIdentifier = false; |
| m_stackToken = -1; |
| m_lastToken = -1; |
| |
| m_position = 0; |
| m_source = source; |
| m_code = m_source->data(); |
| m_length = m_source->length(); |
| m_skipLF = false; |
| m_skipCR = false; |
| m_error = false; |
| m_atLineStart = true; |
| |
| // read first characters |
| shift(4); |
| } |
| |
| void Lexer::shift(unsigned p) |
| { |
| // ECMA-262 calls for stripping Cf characters here, but we only do this for BOM, |
| // see <https://bugs.webkit.org/show_bug.cgi?id=4931>. |
| |
| while (p--) { |
| m_current = m_next1; |
| m_next1 = m_next2; |
| m_next2 = m_next3; |
| m_currentOffset = m_nextOffset1; |
| m_nextOffset1 = m_nextOffset2; |
| m_nextOffset2 = m_nextOffset3; |
| do { |
| if (m_position >= m_length) { |
| m_nextOffset3 = m_position; |
| m_position++; |
| m_next3 = -1; |
| break; |
| } |
| m_nextOffset3 = m_position; |
| m_next3 = m_code[m_position++]; |
| } while (m_next3 == 0xFEFF); |
| } |
| } |
| |
| // called on each new line |
| void Lexer::nextLine() |
| { |
| yylineno++; |
| m_atLineStart = true; |
| } |
| |
| void Lexer::setDone(State s) |
| { |
| m_state = s; |
| m_done = true; |
| } |
| |
| int Lexer::lex(void* p1, void* p2) |
| { |
| YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1); |
| YYLTYPE* llocp = static_cast<YYLTYPE*>(p2); |
| int token = 0; |
| m_state = Start; |
| unsigned short stringType = 0; // either single or double quotes |
| m_buffer8.clear(); |
| m_buffer16.clear(); |
| m_done = false; |
| m_terminator = false; |
| m_skipLF = false; |
| m_skipCR = false; |
| |
| // did we push a token on the stack previously ? |
| // (after an automatic semicolon insertion) |
| if (m_stackToken >= 0) { |
| setDone(Other); |
| token = m_stackToken; |
| m_stackToken = 0; |
| } |
| int startOffset = m_currentOffset; |
| while (!m_done) { |
| if (m_skipLF && m_current != '\n') // found \r but not \n afterwards |
| m_skipLF = false; |
| if (m_skipCR && m_current != '\r') // found \n but not \r afterwards |
| m_skipCR = false; |
| if (m_skipLF || m_skipCR) { // found \r\n or \n\r -> eat the second one |
| m_skipLF = false; |
| m_skipCR = false; |
| shift(1); |
| } |
| switch (m_state) { |
| case Start: |
| startOffset = m_currentOffset; |
| if (isWhiteSpace()) { |
| // do nothing |
| } else if (m_current == '/' && m_next1 == '/') { |
| shift(1); |
| m_state = InSingleLineComment; |
| } else if (m_current == '/' && m_next1 == '*') { |
| shift(1); |
| m_state = InMultiLineComment; |
| } else if (m_current == -1) { |
| if (!m_terminator && !m_delimited) { |
| // automatic semicolon insertion if program incomplete |
| token = ';'; |
| m_stackToken = 0; |
| setDone(Other); |
| } else |
| setDone(Eof); |
| } else if (isLineTerminator()) { |
| nextLine(); |
| m_terminator = true; |
| if (m_restrKeyword) { |
| token = ';'; |
| setDone(Other); |
| } |
| } else if (m_current == '"' || m_current == '\'') { |
| m_state = InString; |
| stringType = static_cast<unsigned short>(m_current); |
| } else if (isIdentStart(m_current)) { |
| record16(m_current); |
| m_state = InIdentifierOrKeyword; |
| } else if (m_current == '\\') |
| m_state = InIdentifierStartUnicodeEscapeStart; |
| else if (m_current == '0') { |
| record8(m_current); |
| m_state = InNum0; |
| } else if (isDecimalDigit(m_current)) { |
| record8(m_current); |
| m_state = InNum; |
| } else if (m_current == '.' && isDecimalDigit(m_next1)) { |
| record8(m_current); |
| m_state = InDecimal; |
| // <!-- marks the beginning of a line comment (for www usage) |
| } else if (m_current == '<' && m_next1 == '!' && m_next2 == '-' && m_next3 == '-') { |
| shift(3); |
| m_state = InSingleLineComment; |
| // same for --> |
| } else if (m_atLineStart && m_current == '-' && m_next1 == '-' && m_next2 == '>') { |
| shift(2); |
| m_state = InSingleLineComment; |
| } else { |
| token = matchPunctuator(lvalp->intValue, m_current, m_next1, m_next2, m_next3); |
| if (token != -1) |
| setDone(Other); |
| else |
| setDone(Bad); |
| } |
| break; |
| case InString: |
| if (m_current == stringType) { |
| shift(1); |
| setDone(String); |
| } else if (isLineTerminator() || m_current == -1) |
| setDone(Bad); |
| else if (m_current == '\\') |
| m_state = InEscapeSequence; |
| else |
| record16(m_current); |
| break; |
| // Escape Sequences inside of strings |
| case InEscapeSequence: |
| if (isOctalDigit(m_current)) { |
| if (m_current >= '0' && m_current <= '3' && |
| isOctalDigit(m_next1) && isOctalDigit(m_next2)) { |
| record16(convertOctal(m_current, m_next1, m_next2)); |
| shift(2); |
| m_state = InString; |
| } else if (isOctalDigit(m_current) && isOctalDigit(m_next1)) { |
| record16(convertOctal('0', m_current, m_next1)); |
| shift(1); |
| m_state = InString; |
| } else if (isOctalDigit(m_current)) { |
| record16(convertOctal('0', '0', m_current)); |
| m_state = InString; |
| } else |
| setDone(Bad); |
| } else if (m_current == 'x') |
| m_state = InHexEscape; |
| else if (m_current == 'u') |
| m_state = InUnicodeEscape; |
| else if (isLineTerminator()) { |
| nextLine(); |
| m_state = InString; |
| } else { |
| record16(singleEscape(static_cast<unsigned short>(m_current))); |
| m_state = InString; |
| } |
| break; |
| case InHexEscape: |
| if (isHexDigit(m_current) && isHexDigit(m_next1)) { |
| m_state = InString; |
| record16(convertHex(m_current, m_next1)); |
| shift(1); |
| } else if (m_current == stringType) { |
| record16('x'); |
| shift(1); |
| setDone(String); |
| } else { |
| record16('x'); |
| record16(m_current); |
| m_state = InString; |
| } |
| break; |
| case InUnicodeEscape: |
| if (isHexDigit(m_current) && isHexDigit(m_next1) && isHexDigit(m_next2) && isHexDigit(m_next3)) { |
| record16(convertUnicode(m_current, m_next1, m_next2, m_next3)); |
| shift(3); |
| m_state = InString; |
| } else if (m_current == stringType) { |
| record16('u'); |
| shift(1); |
| setDone(String); |
| } else |
| setDone(Bad); |
| break; |
| case InSingleLineComment: |
| if (isLineTerminator()) { |
| nextLine(); |
| m_terminator = true; |
| if (m_restrKeyword) { |
| token = ';'; |
| setDone(Other); |
| } else |
| m_state = Start; |
| } else if (m_current == -1) |
| setDone(Eof); |
| break; |
| case InMultiLineComment: |
| if (m_current == -1) |
| setDone(Bad); |
| else if (isLineTerminator()) |
| nextLine(); |
| else if (m_current == '*' && m_next1 == '/') { |
| m_state = Start; |
| shift(1); |
| } |
| break; |
| case InIdentifierOrKeyword: |
| case InIdentifier: |
| if (isIdentPart(m_current)) |
| record16(m_current); |
| else if (m_current == '\\') |
| m_state = InIdentifierPartUnicodeEscapeStart; |
| else |
| setDone(m_state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier); |
| break; |
| case InNum0: |
| if (m_current == 'x' || m_current == 'X') { |
| record8(m_current); |
| m_state = InHex; |
| } else if (m_current == '.') { |
| record8(m_current); |
| m_state = InDecimal; |
| } else if (m_current == 'e' || m_current == 'E') { |
| record8(m_current); |
| m_state = InExponentIndicator; |
| } else if (isOctalDigit(m_current)) { |
| record8(m_current); |
| m_state = InOctal; |
| } else if (isDecimalDigit(m_current)) { |
| record8(m_current); |
| m_state = InDecimal; |
| } else |
| setDone(Number); |
| break; |
| case InHex: |
| if (isHexDigit(m_current)) |
| record8(m_current); |
| else |
| setDone(Hex); |
| break; |
| case InOctal: |
| if (isOctalDigit(m_current)) |
| record8(m_current); |
| else if (isDecimalDigit(m_current)) { |
| record8(m_current); |
| m_state = InDecimal; |
| } else |
| setDone(Octal); |
| break; |
| case InNum: |
| if (isDecimalDigit(m_current)) |
| record8(m_current); |
| else if (m_current == '.') { |
| record8(m_current); |
| m_state = InDecimal; |
| } else if (m_current == 'e' || m_current == 'E') { |
| record8(m_current); |
| m_state = InExponentIndicator; |
| } else |
| setDone(Number); |
| break; |
| case InDecimal: |
| if (isDecimalDigit(m_current)) |
| record8(m_current); |
| else if (m_current == 'e' || m_current == 'E') { |
| record8(m_current); |
| m_state = InExponentIndicator; |
| } else |
| setDone(Number); |
| break; |
| case InExponentIndicator: |
| if (m_current == '+' || m_current == '-') |
| record8(m_current); |
| else if (isDecimalDigit(m_current)) { |
| record8(m_current); |
| m_state = InExponent; |
| } else |
| setDone(Bad); |
| break; |
| case InExponent: |
| if (isDecimalDigit(m_current)) |
| record8(m_current); |
| else |
| setDone(Number); |
| break; |
| case InIdentifierStartUnicodeEscapeStart: |
| if (m_current == 'u') |
| m_state = InIdentifierStartUnicodeEscape; |
| else |
| setDone(Bad); |
| break; |
| case InIdentifierPartUnicodeEscapeStart: |
| if (m_current == 'u') |
| m_state = InIdentifierPartUnicodeEscape; |
| else |
| setDone(Bad); |
| break; |
| case InIdentifierStartUnicodeEscape: |
| if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) { |
| setDone(Bad); |
| break; |
| } |
| token = convertUnicode(m_current, m_next1, m_next2, m_next3); |
| shift(3); |
| if (!isIdentStart(token)) { |
| setDone(Bad); |
| break; |
| } |
| record16(token); |
| m_state = InIdentifier; |
| break; |
| case InIdentifierPartUnicodeEscape: |
| if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) { |
| setDone(Bad); |
| break; |
| } |
| token = convertUnicode(m_current, m_next1, m_next2, m_next3); |
| shift(3); |
| if (!isIdentPart(token)) { |
| setDone(Bad); |
| break; |
| } |
| record16(token); |
| m_state = InIdentifier; |
| break; |
| default: |
| ASSERT(!"Unhandled state in switch statement"); |
| } |
| |
| // move on to the next character |
| if (!m_done) |
| shift(1); |
| if (m_state != Start && m_state != InSingleLineComment) |
| m_atLineStart = false; |
| } |
| |
| // no identifiers allowed directly after numeric literal, e.g. "3in" is bad |
| if ((m_state == Number || m_state == Octal || m_state == Hex) && isIdentStart(m_current)) |
| m_state = Bad; |
| |
| // terminate string |
| m_buffer8.append('\0'); |
| |
| #ifdef KJS_DEBUG_LEX |
| fprintf(stderr, "line: %d ", lineNo()); |
| fprintf(stderr, "yytext (%x): ", m_buffer8[0]); |
| fprintf(stderr, "%s ", m_buffer8.data()); |
| #endif |
| |
| double dval = 0; |
| if (m_state == Number) |
| dval = strtod(m_buffer8.data(), 0L); |
| else if (m_state == Hex) { // scan hex numbers |
| const char* p = m_buffer8.data() + 2; |
| while (char c = *p++) { |
| dval *= 16; |
| dval += convertHex(c); |
| } |
| |
| if (dval >= mantissaOverflowLowerBound) |
| dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16); |
| |
| m_state = Number; |
| } else if (m_state == Octal) { // scan octal number |
| const char* p = m_buffer8.data() + 1; |
| while (char c = *p++) { |
| dval *= 8; |
| dval += c - '0'; |
| } |
| |
| if (dval >= mantissaOverflowLowerBound) |
| dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8); |
| |
| m_state = Number; |
| } |
| |
| #ifdef KJS_DEBUG_LEX |
| switch (m_state) { |
| case Eof: |
| printf("(EOF)\n"); |
| break; |
| case Other: |
| printf("(Other)\n"); |
| break; |
| case Identifier: |
| printf("(Identifier)/(Keyword)\n"); |
| break; |
| case String: |
| printf("(String)\n"); |
| break; |
| case Number: |
| printf("(Number)\n"); |
| break; |
| default: |
| printf("(unknown)"); |
| } |
| #endif |
| |
| if (m_state != Identifier) |
| m_eatNextIdentifier = false; |
| |
| m_restrKeyword = false; |
| m_delimited = false; |
| llocp->first_line = yylineno; |
| llocp->last_line = yylineno; |
| llocp->first_column = startOffset; |
| llocp->last_column = m_currentOffset; |
| switch (m_state) { |
| case Eof: |
| token = 0; |
| break; |
| case Other: |
| if (token == '}' || token == ';') |
| m_delimited = true; |
| break; |
| case Identifier: |
| // Apply anonymous-function hack below (eat the identifier). |
| if (m_eatNextIdentifier) { |
| m_eatNextIdentifier = false; |
| token = lex(lvalp, llocp); |
| break; |
| } |
| lvalp->ident = makeIdentifier(m_buffer16); |
| token = IDENT; |
| break; |
| case IdentifierOrKeyword: { |
| lvalp->ident = makeIdentifier(m_buffer16); |
| const HashEntry* entry = m_mainTable.entry(m_globalData, *lvalp->ident); |
| if (!entry) { |
| // Lookup for keyword failed, means this is an identifier. |
| token = IDENT; |
| break; |
| } |
| token = entry->integerValue; |
| // Hack for "f = function somename() { ... }"; too hard to get into the grammar. |
| m_eatNextIdentifier = token == FUNCTION && m_lastToken == '='; |
| if (token == CONTINUE || token == BREAK || token == RETURN || token == THROW) |
| m_restrKeyword = true; |
| break; |
| } |
| case String: |
| // Atomize constant strings in case they're later used in property lookup. |
| lvalp->ident = makeIdentifier(m_buffer16); |
| token = STRING; |
| break; |
| case Number: |
| lvalp->doubleValue = dval; |
| token = NUMBER; |
| break; |
| case Bad: |
| #ifdef KJS_DEBUG_LEX |
| fprintf(stderr, "yylex: ERROR.\n"); |
| #endif |
| m_error = true; |
| return -1; |
| default: |
| ASSERT(!"unhandled numeration value in switch"); |
| m_error = true; |
| return -1; |
| } |
| m_lastToken = token; |
| return token; |
| } |
| |
| bool Lexer::isWhiteSpace() const |
| { |
| return m_current == '\t' || m_current == 0x0b || m_current == 0x0c || isSeparatorSpace(m_current); |
| } |
| |
| bool Lexer::isLineTerminator() |
| { |
| bool cr = (m_current == '\r'); |
| bool lf = (m_current == '\n'); |
| if (cr) |
| m_skipLF = true; |
| else if (lf) |
| m_skipCR = true; |
| return cr || lf || m_current == 0x2028 || m_current == 0x2029; |
| } |
| |
| bool Lexer::isIdentStart(int c) |
| { |
| return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other)) |
| || c == '$' || c == '_'; |
| } |
| |
| bool Lexer::isIdentPart(int c) |
| { |
| return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other |
| | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector)) |
| || c == '$' || c == '_'; |
| } |
| |
| static bool isDecimalDigit(int c) |
| { |
| return (c >= '0' && c <= '9'); |
| } |
| |
| bool Lexer::isHexDigit(int c) |
| { |
| return (c >= '0' && c <= '9' |
| || c >= 'a' && c <= 'f' |
| || c >= 'A' && c <= 'F'); |
| } |
| |
| bool Lexer::isOctalDigit(int c) |
| { |
| return (c >= '0' && c <= '7'); |
| } |
| |
| int Lexer::matchPunctuator(int& charPos, int c1, int c2, int c3, int c4) |
| { |
| if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') { |
| shift(4); |
| return URSHIFTEQUAL; |
| } |
| if (c1 == '=' && c2 == '=' && c3 == '=') { |
| shift(3); |
| return STREQ; |
| } |
| if (c1 == '!' && c2 == '=' && c3 == '=') { |
| shift(3); |
| return STRNEQ; |
| } |
| if (c1 == '>' && c2 == '>' && c3 == '>') { |
| shift(3); |
| return URSHIFT; |
| } |
| if (c1 == '<' && c2 == '<' && c3 == '=') { |
| shift(3); |
| return LSHIFTEQUAL; |
| } |
| if (c1 == '>' && c2 == '>' && c3 == '=') { |
| shift(3); |
| return RSHIFTEQUAL; |
| } |
| if (c1 == '<' && c2 == '=') { |
| shift(2); |
| return LE; |
| } |
| if (c1 == '>' && c2 == '=') { |
| shift(2); |
| return GE; |
| } |
| if (c1 == '!' && c2 == '=') { |
| shift(2); |
| return NE; |
| } |
| if (c1 == '+' && c2 == '+') { |
| shift(2); |
| if (m_terminator) |
| return AUTOPLUSPLUS; |
| return PLUSPLUS; |
| } |
| if (c1 == '-' && c2 == '-') { |
| shift(2); |
| if (m_terminator) |
| return AUTOMINUSMINUS; |
| return MINUSMINUS; |
| } |
| if (c1 == '=' && c2 == '=') { |
| shift(2); |
| return EQEQ; |
| } |
| if (c1 == '+' && c2 == '=') { |
| shift(2); |
| return PLUSEQUAL; |
| } |
| if (c1 == '-' && c2 == '=') { |
| shift(2); |
| return MINUSEQUAL; |
| } |
| if (c1 == '*' && c2 == '=') { |
| shift(2); |
| return MULTEQUAL; |
| } |
| if (c1 == '/' && c2 == '=') { |
| shift(2); |
| return DIVEQUAL; |
| } |
| if (c1 == '&' && c2 == '=') { |
| shift(2); |
| return ANDEQUAL; |
| } |
| if (c1 == '^' && c2 == '=') { |
| shift(2); |
| return XOREQUAL; |
| } |
| if (c1 == '%' && c2 == '=') { |
| shift(2); |
| return MODEQUAL; |
| } |
| if (c1 == '|' && c2 == '=') { |
| shift(2); |
| return OREQUAL; |
| } |
| if (c1 == '<' && c2 == '<') { |
| shift(2); |
| return LSHIFT; |
| } |
| if (c1 == '>' && c2 == '>') { |
| shift(2); |
| return RSHIFT; |
| } |
| if (c1 == '&' && c2 == '&') { |
| shift(2); |
| return AND; |
| } |
| if (c1 == '|' && c2 == '|') { |
| shift(2); |
| return OR; |
| } |
| |
| switch (c1) { |
| case '=': |
| case '>': |
| case '<': |
| case ',': |
| case '!': |
| case '~': |
| case '?': |
| case ':': |
| case '.': |
| case '+': |
| case '-': |
| case '*': |
| case '/': |
| case '&': |
| case '|': |
| case '^': |
| case '%': |
| case '(': |
| case ')': |
| case '[': |
| case ']': |
| case ';': |
| shift(1); |
| return static_cast<int>(c1); |
| case '{': |
| charPos = m_position - 4; |
| shift(1); |
| return OPENBRACE; |
| case '}': |
| charPos = m_position - 4; |
| shift(1); |
| return CLOSEBRACE; |
| default: |
| return -1; |
| } |
| } |
| |
| unsigned short Lexer::singleEscape(unsigned short c) |
| { |
| switch (c) { |
| case 'b': |
| return 0x08; |
| case 't': |
| return 0x09; |
| case 'n': |
| return 0x0A; |
| case 'v': |
| return 0x0B; |
| case 'f': |
| return 0x0C; |
| case 'r': |
| return 0x0D; |
| case '"': |
| return 0x22; |
| case '\'': |
| return 0x27; |
| case '\\': |
| return 0x5C; |
| default: |
| return c; |
| } |
| } |
| |
| unsigned short Lexer::convertOctal(int c1, int c2, int c3) |
| { |
| return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0'); |
| } |
| |
| unsigned char Lexer::convertHex(int c) |
| { |
| if (c >= '0' && c <= '9') |
| return static_cast<unsigned char>(c - '0'); |
| if (c >= 'a' && c <= 'f') |
| return static_cast<unsigned char>(c - 'a' + 10); |
| return static_cast<unsigned char>(c - 'A' + 10); |
| } |
| |
| unsigned char Lexer::convertHex(int c1, int c2) |
| { |
| return ((convertHex(c1) << 4) + convertHex(c2)); |
| } |
| |
| UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4) |
| { |
| unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2); |
| unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4); |
| return (highByte << 8 | lowByte); |
| } |
| |
| void Lexer::record8(int c) |
| { |
| ASSERT(c >= 0); |
| ASSERT(c <= 0xff); |
| m_buffer8.append(static_cast<char>(c)); |
| } |
| |
| void Lexer::record16(int c) |
| { |
| ASSERT(c >= 0); |
| ASSERT(c <= USHRT_MAX); |
| record16(UChar(static_cast<unsigned short>(c))); |
| } |
| |
| void Lexer::record16(UChar c) |
| { |
| m_buffer16.append(c); |
| } |
| |
| bool Lexer::scanRegExp() |
| { |
| m_buffer16.clear(); |
| bool lastWasEscape = false; |
| bool inBrackets = false; |
| |
| while (1) { |
| if (isLineTerminator() || m_current == -1) |
| return false; |
| else if (m_current != '/' || lastWasEscape == true || inBrackets == true) { |
| // keep track of '[' and ']' |
| if (!lastWasEscape) { |
| if ( m_current == '[' && !inBrackets ) |
| inBrackets = true; |
| if ( m_current == ']' && inBrackets ) |
| inBrackets = false; |
| } |
| record16(m_current); |
| lastWasEscape = |
| !lastWasEscape && (m_current == '\\'); |
| } else { // end of regexp |
| m_pattern = UString(m_buffer16); |
| m_buffer16.clear(); |
| shift(1); |
| break; |
| } |
| shift(1); |
| } |
| |
| while (isIdentPart(m_current)) { |
| record16(m_current); |
| shift(1); |
| } |
| m_flags = UString(m_buffer16); |
| |
| return true; |
| } |
| |
| void Lexer::clear() |
| { |
| deleteAllValues(m_strings); |
| Vector<UString*> newStrings; |
| newStrings.reserveCapacity(initialStringTableCapacity); |
| m_strings.swap(newStrings); |
| |
| deleteAllValues(m_identifiers); |
| Vector<JSC::Identifier*> newIdentifiers; |
| newIdentifiers.reserveCapacity(initialStringTableCapacity); |
| m_identifiers.swap(newIdentifiers); |
| |
| Vector<char> newBuffer8; |
| newBuffer8.reserveCapacity(initialReadBufferCapacity); |
| m_buffer8.swap(newBuffer8); |
| |
| Vector<UChar> newBuffer16; |
| newBuffer16.reserveCapacity(initialReadBufferCapacity); |
| m_buffer16.swap(newBuffer16); |
| |
| m_pattern = 0; |
| m_flags = 0; |
| } |
| |
| Identifier* Lexer::makeIdentifier(const Vector<UChar>& buffer) |
| { |
| JSC::Identifier* identifier = new JSC::Identifier(m_globalData, buffer.data(), buffer.size()); |
| m_identifiers.append(identifier); |
| return identifier; |
| } |
| |
| } // namespace JSC |