| /* |
| * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) |
| * Copyright (C) 2006, 2007, 2008 Apple Inc. All Rights Reserved. |
| * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca) |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Library General Public |
| * License as published by the Free Software Foundation; either |
| * version 2 of the License, or (at your option) any later version. |
| * |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Library General Public License for more details. |
| * |
| * You should have received a copy of the GNU Library General Public License |
| * along with this library; see the file COPYING.LIB. If not, write to |
| * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| * Boston, MA 02110-1301, USA. |
| * |
| */ |
| |
| #include "config.h" |
| #include "lexer.h" |
| |
| #include "dtoa.h" |
| #include "function.h" |
| #include "nodes.h" |
| #include "NodeInfo.h" |
| #include <ctype.h> |
| #include <limits.h> |
| #include <string.h> |
| #include <wtf/Assertions.h> |
| #include <wtf/unicode/Unicode.h> |
| |
| #if USE(MULTIPLE_THREADS) |
| #include <wtf/ThreadSpecific.h> |
| #endif |
| |
| using namespace WTF; |
| using namespace Unicode; |
| |
| // we can't specify the namespace in yacc's C output, so do it here |
| using namespace KJS; |
| |
| #ifndef KDE_USE_FINAL |
| #include "grammar.h" |
| #endif |
| |
| #include "lookup.h" |
| #include "lexer.lut.h" |
| |
| // a bridge for yacc from the C world to C++ |
| int kjsyylex(void* lvalp, void* llocp, void* lexer) |
| { |
| return static_cast<Lexer*>(lexer)->lex(lvalp, llocp); |
| } |
| |
| namespace KJS { |
| |
| static bool isDecimalDigit(int); |
| |
| static const size_t initialReadBufferCapacity = 32; |
| static const size_t initialStringTableCapacity = 64; |
| |
| Lexer& lexer() |
| { |
| #if USE(MULTIPLE_THREADS) |
| static ThreadSpecific<Lexer> staticLexer; |
| return *staticLexer; |
| #else |
| static Lexer staticLexer; |
| return staticLexer; |
| #endif |
| } |
| |
| Lexer::Lexer() |
| : yylineno(1) |
| , restrKeyword(false) |
| , eatNextIdentifier(false) |
| , stackToken(-1) |
| , lastToken(-1) |
| , pos(0) |
| , code(0) |
| , length(0) |
| , atLineStart(true) |
| , current(0) |
| , next1(0) |
| , next2(0) |
| , next3(0) |
| , mainTable(KJS::mainTable) |
| { |
| m_buffer8.reserveCapacity(initialReadBufferCapacity); |
| m_buffer16.reserveCapacity(initialReadBufferCapacity); |
| m_strings.reserveCapacity(initialStringTableCapacity); |
| m_identifiers.reserveCapacity(initialStringTableCapacity); |
| } |
| |
| Lexer::~Lexer() |
| { |
| delete[] mainTable.table; |
| } |
| |
| void Lexer::setCode(int startingLineNumber, PassRefPtr<SourceProvider> source) |
| { |
| yylineno = 1 + startingLineNumber; |
| restrKeyword = false; |
| delimited = false; |
| eatNextIdentifier = false; |
| stackToken = -1; |
| lastToken = -1; |
| |
| pos = 0; |
| m_source = source; |
| code = m_source->data(); |
| length = m_source->length(); |
| skipLF = false; |
| skipCR = false; |
| error = false; |
| atLineStart = true; |
| |
| // read first characters |
| shift(4); |
| } |
| |
| void Lexer::shift(unsigned p) |
| { |
| // ECMA-262 calls for stripping Cf characters here, but we only do this for BOM, |
| // see <https://bugs.webkit.org/show_bug.cgi?id=4931>. |
| |
| while (p--) { |
| current = next1; |
| next1 = next2; |
| next2 = next3; |
| do { |
| if (pos >= length) { |
| pos++; |
| next3 = -1; |
| break; |
| } |
| next3 = code[pos++]; |
| } while (next3 == 0xFEFF); |
| } |
| } |
| |
| // called on each new line |
| void Lexer::nextLine() |
| { |
| yylineno++; |
| atLineStart = true; |
| } |
| |
| void Lexer::setDone(State s) |
| { |
| state = s; |
| done = true; |
| } |
| |
| int Lexer::lex(void* p1, void* p2) |
| { |
| YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1); |
| YYLTYPE* llocp = static_cast<YYLTYPE*>(p2); |
| int token = 0; |
| state = Start; |
| unsigned short stringType = 0; // either single or double quotes |
| m_buffer8.clear(); |
| m_buffer16.clear(); |
| done = false; |
| terminator = false; |
| skipLF = false; |
| skipCR = false; |
| |
| // did we push a token on the stack previously ? |
| // (after an automatic semicolon insertion) |
| if (stackToken >= 0) { |
| setDone(Other); |
| token = stackToken; |
| stackToken = 0; |
| } |
| |
| while (!done) { |
| if (skipLF && current != '\n') // found \r but not \n afterwards |
| skipLF = false; |
| if (skipCR && current != '\r') // found \n but not \r afterwards |
| skipCR = false; |
| if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one |
| { |
| skipLF = false; |
| skipCR = false; |
| shift(1); |
| } |
| switch (state) { |
| case Start: |
| if (isWhiteSpace()) { |
| // do nothing |
| } else if (current == '/' && next1 == '/') { |
| shift(1); |
| state = InSingleLineComment; |
| } else if (current == '/' && next1 == '*') { |
| shift(1); |
| state = InMultiLineComment; |
| } else if (current == -1) { |
| if (!terminator && !delimited) { |
| // automatic semicolon insertion if program incomplete |
| token = ';'; |
| stackToken = 0; |
| setDone(Other); |
| } else |
| setDone(Eof); |
| } else if (isLineTerminator()) { |
| nextLine(); |
| terminator = true; |
| if (restrKeyword) { |
| token = ';'; |
| setDone(Other); |
| } |
| } else if (current == '"' || current == '\'') { |
| state = InString; |
| stringType = static_cast<unsigned short>(current); |
| } else if (isIdentStart(current)) { |
| record16(current); |
| state = InIdentifierOrKeyword; |
| } else if (current == '\\') { |
| state = InIdentifierStartUnicodeEscapeStart; |
| } else if (current == '0') { |
| record8(current); |
| state = InNum0; |
| } else if (isDecimalDigit(current)) { |
| record8(current); |
| state = InNum; |
| } else if (current == '.' && isDecimalDigit(next1)) { |
| record8(current); |
| state = InDecimal; |
| // <!-- marks the beginning of a line comment (for www usage) |
| } else if (current == '<' && next1 == '!' && |
| next2 == '-' && next3 == '-') { |
| shift(3); |
| state = InSingleLineComment; |
| // same for --> |
| } else if (atLineStart && current == '-' && next1 == '-' && next2 == '>') { |
| shift(2); |
| state = InSingleLineComment; |
| } else { |
| token = matchPunctuator(lvalp->intValue, current, next1, next2, next3); |
| if (token != -1) { |
| setDone(Other); |
| } else { |
| // cerr << "encountered unknown character" << endl; |
| setDone(Bad); |
| } |
| } |
| break; |
| case InString: |
| if (current == stringType) { |
| shift(1); |
| setDone(String); |
| } else if (isLineTerminator() || current == -1) { |
| setDone(Bad); |
| } else if (current == '\\') { |
| state = InEscapeSequence; |
| } else { |
| record16(current); |
| } |
| break; |
| // Escape Sequences inside of strings |
| case InEscapeSequence: |
| if (isOctalDigit(current)) { |
| if (current >= '0' && current <= '3' && |
| isOctalDigit(next1) && isOctalDigit(next2)) { |
| record16(convertOctal(current, next1, next2)); |
| shift(2); |
| state = InString; |
| } else if (isOctalDigit(current) && isOctalDigit(next1)) { |
| record16(convertOctal('0', current, next1)); |
| shift(1); |
| state = InString; |
| } else if (isOctalDigit(current)) { |
| record16(convertOctal('0', '0', current)); |
| state = InString; |
| } else { |
| setDone(Bad); |
| } |
| } else if (current == 'x') |
| state = InHexEscape; |
| else if (current == 'u') |
| state = InUnicodeEscape; |
| else if (isLineTerminator()) { |
| nextLine(); |
| state = InString; |
| } else { |
| record16(singleEscape(static_cast<unsigned short>(current))); |
| state = InString; |
| } |
| break; |
| case InHexEscape: |
| if (isHexDigit(current) && isHexDigit(next1)) { |
| state = InString; |
| record16(convertHex(current, next1)); |
| shift(1); |
| } else if (current == stringType) { |
| record16('x'); |
| shift(1); |
| setDone(String); |
| } else { |
| record16('x'); |
| record16(current); |
| state = InString; |
| } |
| break; |
| case InUnicodeEscape: |
| if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) { |
| record16(convertUnicode(current, next1, next2, next3)); |
| shift(3); |
| state = InString; |
| } else if (current == stringType) { |
| record16('u'); |
| shift(1); |
| setDone(String); |
| } else { |
| setDone(Bad); |
| } |
| break; |
| case InSingleLineComment: |
| if (isLineTerminator()) { |
| nextLine(); |
| terminator = true; |
| if (restrKeyword) { |
| token = ';'; |
| setDone(Other); |
| } else |
| state = Start; |
| } else if (current == -1) { |
| setDone(Eof); |
| } |
| break; |
| case InMultiLineComment: |
| if (current == -1) { |
| setDone(Bad); |
| } else if (isLineTerminator()) { |
| nextLine(); |
| } else if (current == '*' && next1 == '/') { |
| state = Start; |
| shift(1); |
| } |
| break; |
| case InIdentifierOrKeyword: |
| case InIdentifier: |
| if (isIdentPart(current)) |
| record16(current); |
| else if (current == '\\') |
| state = InIdentifierPartUnicodeEscapeStart; |
| else |
| setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier); |
| break; |
| case InNum0: |
| if (current == 'x' || current == 'X') { |
| record8(current); |
| state = InHex; |
| } else if (current == '.') { |
| record8(current); |
| state = InDecimal; |
| } else if (current == 'e' || current == 'E') { |
| record8(current); |
| state = InExponentIndicator; |
| } else if (isOctalDigit(current)) { |
| record8(current); |
| state = InOctal; |
| } else if (isDecimalDigit(current)) { |
| record8(current); |
| state = InDecimal; |
| } else { |
| setDone(Number); |
| } |
| break; |
| case InHex: |
| if (isHexDigit(current)) { |
| record8(current); |
| } else { |
| setDone(Hex); |
| } |
| break; |
| case InOctal: |
| if (isOctalDigit(current)) { |
| record8(current); |
| } |
| else if (isDecimalDigit(current)) { |
| record8(current); |
| state = InDecimal; |
| } else |
| setDone(Octal); |
| break; |
| case InNum: |
| if (isDecimalDigit(current)) { |
| record8(current); |
| } else if (current == '.') { |
| record8(current); |
| state = InDecimal; |
| } else if (current == 'e' || current == 'E') { |
| record8(current); |
| state = InExponentIndicator; |
| } else |
| setDone(Number); |
| break; |
| case InDecimal: |
| if (isDecimalDigit(current)) { |
| record8(current); |
| } else if (current == 'e' || current == 'E') { |
| record8(current); |
| state = InExponentIndicator; |
| } else |
| setDone(Number); |
| break; |
| case InExponentIndicator: |
| if (current == '+' || current == '-') { |
| record8(current); |
| } else if (isDecimalDigit(current)) { |
| record8(current); |
| state = InExponent; |
| } else |
| setDone(Bad); |
| break; |
| case InExponent: |
| if (isDecimalDigit(current)) { |
| record8(current); |
| } else |
| setDone(Number); |
| break; |
| case InIdentifierStartUnicodeEscapeStart: |
| if (current == 'u') |
| state = InIdentifierStartUnicodeEscape; |
| else |
| setDone(Bad); |
| break; |
| case InIdentifierPartUnicodeEscapeStart: |
| if (current == 'u') |
| state = InIdentifierPartUnicodeEscape; |
| else |
| setDone(Bad); |
| break; |
| case InIdentifierStartUnicodeEscape: |
| if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) { |
| setDone(Bad); |
| break; |
| } |
| token = convertUnicode(current, next1, next2, next3); |
| shift(3); |
| if (!isIdentStart(token)) { |
| setDone(Bad); |
| break; |
| } |
| record16(token); |
| state = InIdentifier; |
| break; |
| case InIdentifierPartUnicodeEscape: |
| if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) { |
| setDone(Bad); |
| break; |
| } |
| token = convertUnicode(current, next1, next2, next3); |
| shift(3); |
| if (!isIdentPart(token)) { |
| setDone(Bad); |
| break; |
| } |
| record16(token); |
| state = InIdentifier; |
| break; |
| default: |
| ASSERT(!"Unhandled state in switch statement"); |
| } |
| |
| // move on to the next character |
| if (!done) |
| shift(1); |
| if (state != Start && state != InSingleLineComment) |
| atLineStart = false; |
| } |
| |
| // no identifiers allowed directly after numeric literal, e.g. "3in" is bad |
| if ((state == Number || state == Octal || state == Hex) && isIdentStart(current)) |
| state = Bad; |
| |
| // terminate string |
| m_buffer8.append('\0'); |
| |
| #ifdef KJS_DEBUG_LEX |
| fprintf(stderr, "line: %d ", lineNo()); |
| fprintf(stderr, "yytext (%x): ", m_buffer8[0]); |
| fprintf(stderr, "%s ", buffer8.data()); |
| #endif |
| |
| double dval = 0; |
| if (state == Number) { |
| dval = strtod(m_buffer8.data(), 0L); |
| } else if (state == Hex) { // scan hex numbers |
| const char* p = m_buffer8.data() + 2; |
| while (char c = *p++) { |
| dval *= 16; |
| dval += convertHex(c); |
| } |
| |
| if (dval >= mantissaOverflowLowerBound) |
| dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16); |
| |
| state = Number; |
| } else if (state == Octal) { // scan octal number |
| const char* p = m_buffer8.data() + 1; |
| while (char c = *p++) { |
| dval *= 8; |
| dval += c - '0'; |
| } |
| |
| if (dval >= mantissaOverflowLowerBound) |
| dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8); |
| |
| state = Number; |
| } |
| |
| #ifdef KJS_DEBUG_LEX |
| switch (state) { |
| case Eof: |
| printf("(EOF)\n"); |
| break; |
| case Other: |
| printf("(Other)\n"); |
| break; |
| case Identifier: |
| printf("(Identifier)/(Keyword)\n"); |
| break; |
| case String: |
| printf("(String)\n"); |
| break; |
| case Number: |
| printf("(Number)\n"); |
| break; |
| default: |
| printf("(unknown)"); |
| } |
| #endif |
| |
| if (state != Identifier) |
| eatNextIdentifier = false; |
| |
| restrKeyword = false; |
| delimited = false; |
| llocp->first_line = yylineno; // ??? |
| llocp->last_line = yylineno; |
| |
| switch (state) { |
| case Eof: |
| token = 0; |
| break; |
| case Other: |
| if (token == '}' || token == ';') |
| delimited = true; |
| break; |
| case Identifier: |
| // Apply anonymous-function hack below (eat the identifier). |
| if (eatNextIdentifier) { |
| eatNextIdentifier = false; |
| token = lex(lvalp, llocp); |
| break; |
| } |
| lvalp->ident = makeIdentifier(m_buffer16); |
| token = IDENT; |
| break; |
| case IdentifierOrKeyword: |
| lvalp->ident = makeIdentifier(m_buffer16); |
| if ((token = mainTable.value(*lvalp->ident)) < 0) { |
| // Lookup for keyword failed, means this is an identifier. |
| token = IDENT; |
| break; |
| } |
| // Hack for "f = function somename() { ... }"; too hard to get into the grammar. |
| eatNextIdentifier = token == FUNCTION && lastToken == '='; |
| if (token == CONTINUE || token == BREAK || token == RETURN || token == THROW) |
| restrKeyword = true; |
| break; |
| case String: |
| lvalp->string = makeUString(m_buffer16); |
| token = STRING; |
| break; |
| case Number: |
| lvalp->doubleValue = dval; |
| token = NUMBER; |
| break; |
| case Bad: |
| #ifdef KJS_DEBUG_LEX |
| fprintf(stderr, "yylex: ERROR.\n"); |
| #endif |
| error = true; |
| return -1; |
| default: |
| ASSERT(!"unhandled numeration value in switch"); |
| error = true; |
| return -1; |
| } |
| lastToken = token; |
| return token; |
| } |
| |
| bool Lexer::isWhiteSpace() const |
| { |
| return current == '\t' || current == 0x0b || current == 0x0c || isSeparatorSpace(current); |
| } |
| |
| bool Lexer::isLineTerminator() |
| { |
| bool cr = (current == '\r'); |
| bool lf = (current == '\n'); |
| if (cr) |
| skipLF = true; |
| else if (lf) |
| skipCR = true; |
| return cr || lf || current == 0x2028 || current == 0x2029; |
| } |
| |
| bool Lexer::isIdentStart(int c) |
| { |
| return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other)) |
| || c == '$' || c == '_'; |
| } |
| |
| bool Lexer::isIdentPart(int c) |
| { |
| return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other |
| | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector)) |
| || c == '$' || c == '_'; |
| } |
| |
| static bool isDecimalDigit(int c) |
| { |
| return (c >= '0' && c <= '9'); |
| } |
| |
| bool Lexer::isHexDigit(int c) |
| { |
| return (c >= '0' && c <= '9' || |
| c >= 'a' && c <= 'f' || |
| c >= 'A' && c <= 'F'); |
| } |
| |
| bool Lexer::isOctalDigit(int c) |
| { |
| return (c >= '0' && c <= '7'); |
| } |
| |
| int Lexer::matchPunctuator(int& charPos, int c1, int c2, int c3, int c4) |
| { |
| if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') { |
| shift(4); |
| return URSHIFTEQUAL; |
| } else if (c1 == '=' && c2 == '=' && c3 == '=') { |
| shift(3); |
| return STREQ; |
| } else if (c1 == '!' && c2 == '=' && c3 == '=') { |
| shift(3); |
| return STRNEQ; |
| } else if (c1 == '>' && c2 == '>' && c3 == '>') { |
| shift(3); |
| return URSHIFT; |
| } else if (c1 == '<' && c2 == '<' && c3 == '=') { |
| shift(3); |
| return LSHIFTEQUAL; |
| } else if (c1 == '>' && c2 == '>' && c3 == '=') { |
| shift(3); |
| return RSHIFTEQUAL; |
| } else if (c1 == '<' && c2 == '=') { |
| shift(2); |
| return LE; |
| } else if (c1 == '>' && c2 == '=') { |
| shift(2); |
| return GE; |
| } else if (c1 == '!' && c2 == '=') { |
| shift(2); |
| return NE; |
| } else if (c1 == '+' && c2 == '+') { |
| shift(2); |
| if (terminator) |
| return AUTOPLUSPLUS; |
| else |
| return PLUSPLUS; |
| } else if (c1 == '-' && c2 == '-') { |
| shift(2); |
| if (terminator) |
| return AUTOMINUSMINUS; |
| else |
| return MINUSMINUS; |
| } else if (c1 == '=' && c2 == '=') { |
| shift(2); |
| return EQEQ; |
| } else if (c1 == '+' && c2 == '=') { |
| shift(2); |
| return PLUSEQUAL; |
| } else if (c1 == '-' && c2 == '=') { |
| shift(2); |
| return MINUSEQUAL; |
| } else if (c1 == '*' && c2 == '=') { |
| shift(2); |
| return MULTEQUAL; |
| } else if (c1 == '/' && c2 == '=') { |
| shift(2); |
| return DIVEQUAL; |
| } else if (c1 == '&' && c2 == '=') { |
| shift(2); |
| return ANDEQUAL; |
| } else if (c1 == '^' && c2 == '=') { |
| shift(2); |
| return XOREQUAL; |
| } else if (c1 == '%' && c2 == '=') { |
| shift(2); |
| return MODEQUAL; |
| } else if (c1 == '|' && c2 == '=') { |
| shift(2); |
| return OREQUAL; |
| } else if (c1 == '<' && c2 == '<') { |
| shift(2); |
| return LSHIFT; |
| } else if (c1 == '>' && c2 == '>') { |
| shift(2); |
| return RSHIFT; |
| } else if (c1 == '&' && c2 == '&') { |
| shift(2); |
| return AND; |
| } else if (c1 == '|' && c2 == '|') { |
| shift(2); |
| return OR; |
| } |
| |
| switch(c1) { |
| case '=': |
| case '>': |
| case '<': |
| case ',': |
| case '!': |
| case '~': |
| case '?': |
| case ':': |
| case '.': |
| case '+': |
| case '-': |
| case '*': |
| case '/': |
| case '&': |
| case '|': |
| case '^': |
| case '%': |
| case '(': |
| case ')': |
| case '[': |
| case ']': |
| case ';': |
| shift(1); |
| return static_cast<int>(c1); |
| case '{': |
| charPos = pos - 4; |
| shift(1); |
| return OPENBRACE; |
| case '}': |
| charPos = pos - 4; |
| shift(1); |
| return CLOSEBRACE; |
| default: |
| return -1; |
| } |
| } |
| |
| unsigned short Lexer::singleEscape(unsigned short c) |
| { |
| switch(c) { |
| case 'b': |
| return 0x08; |
| case 't': |
| return 0x09; |
| case 'n': |
| return 0x0A; |
| case 'v': |
| return 0x0B; |
| case 'f': |
| return 0x0C; |
| case 'r': |
| return 0x0D; |
| case '"': |
| return 0x22; |
| case '\'': |
| return 0x27; |
| case '\\': |
| return 0x5C; |
| default: |
| return c; |
| } |
| } |
| |
| unsigned short Lexer::convertOctal(int c1, int c2, int c3) |
| { |
| return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0'); |
| } |
| |
| unsigned char Lexer::convertHex(int c) |
| { |
| if (c >= '0' && c <= '9') |
| return static_cast<unsigned char>(c - '0'); |
| if (c >= 'a' && c <= 'f') |
| return static_cast<unsigned char>(c - 'a' + 10); |
| return static_cast<unsigned char>(c - 'A' + 10); |
| } |
| |
| unsigned char Lexer::convertHex(int c1, int c2) |
| { |
| return ((convertHex(c1) << 4) + convertHex(c2)); |
| } |
| |
| UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4) |
| { |
| unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2); |
| unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4); |
| return (highByte << 8 | lowByte); |
| } |
| |
| void Lexer::record8(int c) |
| { |
| ASSERT(c >= 0); |
| ASSERT(c <= 0xff); |
| m_buffer8.append(static_cast<char>(c)); |
| } |
| |
| void Lexer::record16(int c) |
| { |
| ASSERT(c >= 0); |
| ASSERT(c <= USHRT_MAX); |
| record16(UChar(static_cast<unsigned short>(c))); |
| } |
| |
| void Lexer::record16(UChar c) |
| { |
| m_buffer16.append(c); |
| } |
| |
| bool Lexer::scanRegExp() |
| { |
| m_buffer16.clear(); |
| bool lastWasEscape = false; |
| bool inBrackets = false; |
| |
| while (1) { |
| if (isLineTerminator() || current == -1) |
| return false; |
| else if (current != '/' || lastWasEscape == true || inBrackets == true) |
| { |
| // keep track of '[' and ']' |
| if (!lastWasEscape) { |
| if ( current == '[' && !inBrackets ) |
| inBrackets = true; |
| if ( current == ']' && inBrackets ) |
| inBrackets = false; |
| } |
| record16(current); |
| lastWasEscape = |
| !lastWasEscape && (current == '\\'); |
| } else { // end of regexp |
| m_pattern = UString(m_buffer16); |
| m_buffer16.clear(); |
| shift(1); |
| break; |
| } |
| shift(1); |
| } |
| |
| while (isIdentPart(current)) { |
| record16(current); |
| shift(1); |
| } |
| m_flags = UString(m_buffer16); |
| |
| return true; |
| } |
| |
| void Lexer::clear() |
| { |
| deleteAllValues(m_strings); |
| Vector<UString*> newStrings; |
| newStrings.reserveCapacity(initialStringTableCapacity); |
| m_strings.swap(newStrings); |
| |
| deleteAllValues(m_identifiers); |
| Vector<KJS::Identifier*> newIdentifiers; |
| newIdentifiers.reserveCapacity(initialStringTableCapacity); |
| m_identifiers.swap(newIdentifiers); |
| |
| Vector<char> newBuffer8; |
| newBuffer8.reserveCapacity(initialReadBufferCapacity); |
| m_buffer8.swap(newBuffer8); |
| |
| Vector<UChar> newBuffer16; |
| newBuffer16.reserveCapacity(initialReadBufferCapacity); |
| m_buffer16.swap(newBuffer16); |
| |
| m_pattern = 0; |
| m_flags = 0; |
| } |
| |
| Identifier* Lexer::makeIdentifier(const Vector<UChar>& buffer) |
| { |
| KJS::Identifier* identifier = new KJS::Identifier(buffer.data(), buffer.size()); |
| m_identifiers.append(identifier); |
| return identifier; |
| } |
| |
| UString* Lexer::makeUString(const Vector<UChar>& buffer) |
| { |
| UString* string = new UString(buffer); |
| m_strings.append(string); |
| return string; |
| } |
| |
| } // namespace KJS |