| // -*- c-basic-offset: 2 -*- |
| /* |
| * This file is part of the KDE libraries |
| * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) |
| * Copyright (C) 2006 Apple Computer, Inc. |
| * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca) |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Library General Public |
| * License as published by the Free Software Foundation; either |
| * version 2 of the License, or (at your option) any later version. |
| * |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Library General Public License for more details. |
| * |
| * You should have received a copy of the GNU Library General Public License |
| * along with this library; see the file COPYING.LIB. If not, write to |
| * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| * Boston, MA 02110-1301, USA. |
| * |
| */ |
| |
| #include "config.h" |
| #include "lexer.h" |
| |
| #include <ctype.h> |
| #include <string.h> |
| |
| #include "function.h" |
| #include "interpreter.h" |
| #include "nodes.h" |
| #include <wtf/unicode/Unicode.h> |
| |
| using namespace WTF; |
| using namespace Unicode; |
| |
| // we can't specify the namespace in yacc's C output, so do it here |
| using namespace KJS; |
| |
| #ifndef KDE_USE_FINAL |
| #include "grammar.h" |
| #endif |
| |
| #include "lookup.h" |
| #include "lexer.lut.h" |
| |
| extern YYLTYPE kjsyylloc; // global bison variable holding token info |
| |
| // a bridge for yacc from the C world to C++ |
| int kjsyylex() |
| { |
| return Lexer::curr()->lex(); |
| } |
| |
| namespace KJS { |
| |
| static Lexer* currLexer = 0; |
| |
| static bool isDecimalDigit(int); |
| |
| Lexer::Lexer() |
| : yylineno(1), |
| size8(128), size16(128), restrKeyword(false), |
| eatNextIdentifier(false), stackToken(-1), lastToken(-1), pos(0), |
| code(0), length(0), |
| #ifndef KJS_PURE_ECMA |
| bol(true), |
| #endif |
| current(0), next1(0), next2(0), next3(0), |
| strings(0), numStrings(0), stringsCapacity(0), |
| identifiers(0), numIdentifiers(0), identifiersCapacity(0) |
| { |
| // allocate space for read buffers |
| buffer8 = new char[size8]; |
| buffer16 = new KJS::UChar[size16]; |
| currLexer = this; |
| } |
| |
| Lexer::~Lexer() |
| { |
| doneParsing(); |
| delete [] buffer8; |
| delete [] buffer16; |
| } |
| |
| Lexer *Lexer::curr() |
| { |
| if (!currLexer) { |
| // create singleton instance |
| currLexer = new Lexer(); |
| } |
| return currLexer; |
| } |
| |
| #ifdef KJS_DEBUG_MEM |
| void Lexer::globalClear() |
| { |
| delete currLexer; |
| currLexer = 0L; |
| } |
| #endif |
| |
| void Lexer::setCode(const UString &sourceURL, int startingLineNumber, const KJS::UChar *c, unsigned int len) |
| { |
| yylineno = 1 + startingLineNumber; |
| m_sourceURL = sourceURL; |
| restrKeyword = false; |
| delimited = false; |
| eatNextIdentifier = false; |
| stackToken = -1; |
| lastToken = -1; |
| pos = 0; |
| code = c; |
| length = len; |
| skipLF = false; |
| skipCR = false; |
| error = false; |
| #ifndef KJS_PURE_ECMA |
| bol = true; |
| #endif |
| |
| // read first characters |
| current = (length > 0) ? code[0].uc : -1; |
| next1 = (length > 1) ? code[1].uc : -1; |
| next2 = (length > 2) ? code[2].uc : -1; |
| next3 = (length > 3) ? code[3].uc : -1; |
| } |
| |
| void Lexer::shift(unsigned int p) |
| { |
| // Here would be a good place to strip Cf characters, but that has caused compatibility problems: |
| // <http://bugs.webkit.org/show_bug.cgi?id=10183>. |
| while (p--) { |
| pos++; |
| current = next1; |
| next1 = next2; |
| next2 = next3; |
| next3 = (pos + 3 < length) ? code[pos + 3].uc : -1; |
| } |
| } |
| |
| // called on each new line |
| void Lexer::nextLine() |
| { |
| yylineno++; |
| #ifndef KJS_PURE_ECMA |
| bol = true; |
| #endif |
| } |
| |
| void Lexer::setDone(State s) |
| { |
| state = s; |
| done = true; |
| } |
| |
| int Lexer::lex() |
| { |
| int token = 0; |
| state = Start; |
| unsigned short stringType = 0; // either single or double quotes |
| pos8 = pos16 = 0; |
| done = false; |
| terminator = false; |
| skipLF = false; |
| skipCR = false; |
| |
| // did we push a token on the stack previously ? |
| // (after an automatic semicolon insertion) |
| if (stackToken >= 0) { |
| setDone(Other); |
| token = stackToken; |
| stackToken = 0; |
| } |
| |
| while (!done) { |
| if (skipLF && current != '\n') // found \r but not \n afterwards |
| skipLF = false; |
| if (skipCR && current != '\r') // found \n but not \r afterwards |
| skipCR = false; |
| if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one |
| { |
| skipLF = false; |
| skipCR = false; |
| shift(1); |
| } |
| switch (state) { |
| case Start: |
| if (isWhiteSpace()) { |
| // do nothing |
| } else if (current == '/' && next1 == '/') { |
| shift(1); |
| state = InSingleLineComment; |
| } else if (current == '/' && next1 == '*') { |
| shift(1); |
| state = InMultiLineComment; |
| } else if (current == -1) { |
| if (!terminator && !delimited) { |
| // automatic semicolon insertion if program incomplete |
| token = ';'; |
| stackToken = 0; |
| setDone(Other); |
| } else |
| setDone(Eof); |
| } else if (isLineTerminator()) { |
| nextLine(); |
| terminator = true; |
| if (restrKeyword) { |
| token = ';'; |
| setDone(Other); |
| } |
| } else if (current == '"' || current == '\'') { |
| state = InString; |
| stringType = static_cast<unsigned short>(current); |
| } else if (isIdentStart(current)) { |
| record16(current); |
| state = InIdentifierOrKeyword; |
| } else if (current == '\\') { |
| state = InIdentifierUnicodeEscapeStart; |
| } else if (current == '0') { |
| record8(current); |
| state = InNum0; |
| } else if (isDecimalDigit(current)) { |
| record8(current); |
| state = InNum; |
| } else if (current == '.' && isDecimalDigit(next1)) { |
| record8(current); |
| state = InDecimal; |
| #ifndef KJS_PURE_ECMA |
| // <!-- marks the beginning of a line comment (for www usage) |
| } else if (current == '<' && next1 == '!' && |
| next2 == '-' && next3 == '-') { |
| shift(3); |
| state = InSingleLineComment; |
| // same for --> |
| } else if (bol && current == '-' && next1 == '-' && next2 == '>') { |
| shift(2); |
| state = InSingleLineComment; |
| #endif |
| } else { |
| token = matchPunctuator(current, next1, next2, next3); |
| if (token != -1) { |
| setDone(Other); |
| } else { |
| // cerr << "encountered unknown character" << endl; |
| setDone(Bad); |
| } |
| } |
| break; |
| case InString: |
| if (current == stringType) { |
| shift(1); |
| setDone(String); |
| } else if (isLineTerminator() || current == -1) { |
| setDone(Bad); |
| } else if (current == '\\') { |
| state = InEscapeSequence; |
| } else { |
| record16(current); |
| } |
| break; |
| // Escape Sequences inside of strings |
| case InEscapeSequence: |
| if (isOctalDigit(current)) { |
| if (current >= '0' && current <= '3' && |
| isOctalDigit(next1) && isOctalDigit(next2)) { |
| record16(convertOctal(current, next1, next2)); |
| shift(2); |
| state = InString; |
| } else if (isOctalDigit(current) && isOctalDigit(next1)) { |
| record16(convertOctal('0', current, next1)); |
| shift(1); |
| state = InString; |
| } else if (isOctalDigit(current)) { |
| record16(convertOctal('0', '0', current)); |
| state = InString; |
| } else { |
| setDone(Bad); |
| } |
| } else if (current == 'x') |
| state = InHexEscape; |
| else if (current == 'u') |
| state = InUnicodeEscape; |
| else if (isLineTerminator()) { |
| nextLine(); |
| state = InString; |
| } else { |
| record16(singleEscape(static_cast<unsigned short>(current))); |
| state = InString; |
| } |
| break; |
| case InHexEscape: |
| if (isHexDigit(current) && isHexDigit(next1)) { |
| state = InString; |
| record16(convertHex(current, next1)); |
| shift(1); |
| } else if (current == stringType) { |
| record16('x'); |
| shift(1); |
| setDone(String); |
| } else { |
| record16('x'); |
| record16(current); |
| state = InString; |
| } |
| break; |
| case InUnicodeEscape: |
| if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) { |
| record16(convertUnicode(current, next1, next2, next3)); |
| shift(3); |
| state = InString; |
| } else if (current == stringType) { |
| record16('u'); |
| shift(1); |
| setDone(String); |
| } else { |
| setDone(Bad); |
| } |
| break; |
| case InSingleLineComment: |
| if (isLineTerminator()) { |
| nextLine(); |
| terminator = true; |
| if (restrKeyword) { |
| token = ';'; |
| setDone(Other); |
| } else |
| state = Start; |
| } else if (current == -1) { |
| setDone(Eof); |
| } |
| break; |
| case InMultiLineComment: |
| if (current == -1) { |
| setDone(Bad); |
| } else if (isLineTerminator()) { |
| nextLine(); |
| } else if (current == '*' && next1 == '/') { |
| state = Start; |
| shift(1); |
| } |
| break; |
| case InIdentifierOrKeyword: |
| case InIdentifier: |
| if (isIdentPart(current)) |
| record16(current); |
| else if (current == '\\') |
| state = InIdentifierUnicodeEscapeStart; |
| else |
| setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier); |
| break; |
| case InNum0: |
| if (current == 'x' || current == 'X') { |
| record8(current); |
| state = InHex; |
| } else if (current == '.') { |
| record8(current); |
| state = InDecimal; |
| } else if (current == 'e' || current == 'E') { |
| record8(current); |
| state = InExponentIndicator; |
| } else if (isOctalDigit(current)) { |
| record8(current); |
| state = InOctal; |
| } else if (isDecimalDigit(current)) { |
| record8(current); |
| state = InDecimal; |
| } else { |
| setDone(Number); |
| } |
| break; |
| case InHex: |
| if (isHexDigit(current)) { |
| record8(current); |
| } else { |
| setDone(Hex); |
| } |
| break; |
| case InOctal: |
| if (isOctalDigit(current)) { |
| record8(current); |
| } |
| else if (isDecimalDigit(current)) { |
| record8(current); |
| state = InDecimal; |
| } else |
| setDone(Octal); |
| break; |
| case InNum: |
| if (isDecimalDigit(current)) { |
| record8(current); |
| } else if (current == '.') { |
| record8(current); |
| state = InDecimal; |
| } else if (current == 'e' || current == 'E') { |
| record8(current); |
| state = InExponentIndicator; |
| } else |
| setDone(Number); |
| break; |
| case InDecimal: |
| if (isDecimalDigit(current)) { |
| record8(current); |
| } else if (current == 'e' || current == 'E') { |
| record8(current); |
| state = InExponentIndicator; |
| } else |
| setDone(Number); |
| break; |
| case InExponentIndicator: |
| if (current == '+' || current == '-') { |
| record8(current); |
| } else if (isDecimalDigit(current)) { |
| record8(current); |
| state = InExponent; |
| } else |
| setDone(Bad); |
| break; |
| case InExponent: |
| if (isDecimalDigit(current)) { |
| record8(current); |
| } else |
| setDone(Number); |
| break; |
| case InIdentifierUnicodeEscapeStart: |
| if (current == 'u') |
| state = InIdentifierUnicodeEscape; |
| else |
| setDone(Bad); |
| break; |
| case InIdentifierUnicodeEscape: |
| if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) { |
| record16(convertUnicode(current, next1, next2, next3)); |
| shift(3); |
| state = InIdentifier; |
| } else { |
| setDone(Bad); |
| } |
| break; |
| default: |
| assert(!"Unhandled state in switch statement"); |
| } |
| |
| // move on to the next character |
| if (!done) |
| shift(1); |
| #ifndef KJS_PURE_ECMA |
| if (state != Start && state != InSingleLineComment) |
| bol = false; |
| #endif |
| } |
| |
| // no identifiers allowed directly after numeric literal, e.g. "3in" is bad |
| if ((state == Number || state == Octal || state == Hex) && isIdentStart(current)) |
| state = Bad; |
| |
| // terminate string |
| buffer8[pos8] = '\0'; |
| |
| #ifdef KJS_DEBUG_LEX |
| fprintf(stderr, "line: %d ", lineNo()); |
| fprintf(stderr, "yytext (%x): ", buffer8[0]); |
| fprintf(stderr, "%s ", buffer8); |
| #endif |
| |
| double dval = 0; |
| if (state == Number) { |
| dval = strtod(buffer8, 0L); |
| } else if (state == Hex) { // scan hex numbers |
| const char *p = buffer8 + 2; |
| while (char c = *p++) { |
| dval *= 16; |
| dval += convertHex(c); |
| } |
| |
| if (dval >= mantissaOverflowLowerBound) |
| dval = parseIntOverflow(buffer8 + 2, p - (buffer8 + 3), 16); |
| |
| state = Number; |
| } else if (state == Octal) { // scan octal number |
| const char *p = buffer8 + 1; |
| while (char c = *p++) { |
| dval *= 8; |
| dval += c - '0'; |
| } |
| |
| if (dval >= mantissaOverflowLowerBound) |
| dval = parseIntOverflow(buffer8 + 1, p - (buffer8 + 2), 8); |
| |
| state = Number; |
| } |
| |
| #ifdef KJS_DEBUG_LEX |
| switch (state) { |
| case Eof: |
| printf("(EOF)\n"); |
| break; |
| case Other: |
| printf("(Other)\n"); |
| break; |
| case Identifier: |
| printf("(Identifier)/(Keyword)\n"); |
| break; |
| case String: |
| printf("(String)\n"); |
| break; |
| case Number: |
| printf("(Number)\n"); |
| break; |
| default: |
| printf("(unknown)"); |
| } |
| #endif |
| |
| if (state != Identifier && eatNextIdentifier) |
| eatNextIdentifier = false; |
| |
| restrKeyword = false; |
| delimited = false; |
| kjsyylloc.first_line = yylineno; // ??? |
| kjsyylloc.last_line = yylineno; |
| |
| switch (state) { |
| case Eof: |
| token = 0; |
| break; |
| case Other: |
| if(token == '}' || token == ';') { |
| delimited = true; |
| } |
| break; |
| case IdentifierOrKeyword: |
| if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) { |
| case Identifier: |
| // Lookup for keyword failed, means this is an identifier |
| // Apply anonymous-function hack below (eat the identifier) |
| if (eatNextIdentifier) { |
| eatNextIdentifier = false; |
| token = lex(); |
| break; |
| } |
| kjsyylval.ident = makeIdentifier(buffer16, pos16); |
| token = IDENT; |
| break; |
| } |
| |
| eatNextIdentifier = false; |
| // Hack for "f = function somename() { ... }", too hard to get into the grammar |
| if (token == FUNCTION && lastToken == '=' ) |
| eatNextIdentifier = true; |
| |
| if (token == CONTINUE || token == BREAK || |
| token == RETURN || token == THROW) |
| restrKeyword = true; |
| break; |
| case String: |
| kjsyylval.ustr = makeUString(buffer16, pos16); |
| token = STRING; |
| break; |
| case Number: |
| kjsyylval.dval = dval; |
| token = NUMBER; |
| break; |
| case Bad: |
| #ifdef KJS_DEBUG_LEX |
| fprintf(stderr, "yylex: ERROR.\n"); |
| #endif |
| error = true; |
| return -1; |
| default: |
| assert(!"unhandled numeration value in switch"); |
| error = true; |
| return -1; |
| } |
| lastToken = token; |
| return token; |
| } |
| |
| bool Lexer::isWhiteSpace() const |
| { |
| return current == '\t' || current == 0x0b || current == 0x0c || isSeparatorSpace(current); |
| } |
| |
| bool Lexer::isLineTerminator() |
| { |
| bool cr = (current == '\r'); |
| bool lf = (current == '\n'); |
| if (cr) |
| skipLF = true; |
| else if (lf) |
| skipCR = true; |
| return cr || lf || current == 0x2028 || current == 0x2029; |
| } |
| |
| bool Lexer::isIdentStart(int c) |
| { |
| return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other)) |
| || c == '$' || c == '_'; |
| } |
| |
| bool Lexer::isIdentPart(int c) |
| { |
| return (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other |
| | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector)) |
| || c == '$' || c == '_'; |
| } |
| |
| static bool isDecimalDigit(int c) |
| { |
| return (c >= '0' && c <= '9'); |
| } |
| |
| bool Lexer::isHexDigit(int c) |
| { |
| return (c >= '0' && c <= '9' || |
| c >= 'a' && c <= 'f' || |
| c >= 'A' && c <= 'F'); |
| } |
| |
| bool Lexer::isOctalDigit(int c) |
| { |
| return (c >= '0' && c <= '7'); |
| } |
| |
| int Lexer::matchPunctuator(int c1, int c2, int c3, int c4) |
| { |
| if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') { |
| shift(4); |
| return URSHIFTEQUAL; |
| } else if (c1 == '=' && c2 == '=' && c3 == '=') { |
| shift(3); |
| return STREQ; |
| } else if (c1 == '!' && c2 == '=' && c3 == '=') { |
| shift(3); |
| return STRNEQ; |
| } else if (c1 == '>' && c2 == '>' && c3 == '>') { |
| shift(3); |
| return URSHIFT; |
| } else if (c1 == '<' && c2 == '<' && c3 == '=') { |
| shift(3); |
| return LSHIFTEQUAL; |
| } else if (c1 == '>' && c2 == '>' && c3 == '=') { |
| shift(3); |
| return RSHIFTEQUAL; |
| } else if (c1 == '<' && c2 == '=') { |
| shift(2); |
| return LE; |
| } else if (c1 == '>' && c2 == '=') { |
| shift(2); |
| return GE; |
| } else if (c1 == '!' && c2 == '=') { |
| shift(2); |
| return NE; |
| } else if (c1 == '+' && c2 == '+') { |
| shift(2); |
| if (terminator) |
| return AUTOPLUSPLUS; |
| else |
| return PLUSPLUS; |
| } else if (c1 == '-' && c2 == '-') { |
| shift(2); |
| if (terminator) |
| return AUTOMINUSMINUS; |
| else |
| return MINUSMINUS; |
| } else if (c1 == '=' && c2 == '=') { |
| shift(2); |
| return EQEQ; |
| } else if (c1 == '+' && c2 == '=') { |
| shift(2); |
| return PLUSEQUAL; |
| } else if (c1 == '-' && c2 == '=') { |
| shift(2); |
| return MINUSEQUAL; |
| } else if (c1 == '*' && c2 == '=') { |
| shift(2); |
| return MULTEQUAL; |
| } else if (c1 == '/' && c2 == '=') { |
| shift(2); |
| return DIVEQUAL; |
| } else if (c1 == '&' && c2 == '=') { |
| shift(2); |
| return ANDEQUAL; |
| } else if (c1 == '^' && c2 == '=') { |
| shift(2); |
| return XOREQUAL; |
| } else if (c1 == '%' && c2 == '=') { |
| shift(2); |
| return MODEQUAL; |
| } else if (c1 == '|' && c2 == '=') { |
| shift(2); |
| return OREQUAL; |
| } else if (c1 == '<' && c2 == '<') { |
| shift(2); |
| return LSHIFT; |
| } else if (c1 == '>' && c2 == '>') { |
| shift(2); |
| return RSHIFT; |
| } else if (c1 == '&' && c2 == '&') { |
| shift(2); |
| return AND; |
| } else if (c1 == '|' && c2 == '|') { |
| shift(2); |
| return OR; |
| } |
| |
| switch(c1) { |
| case '=': |
| case '>': |
| case '<': |
| case ',': |
| case '!': |
| case '~': |
| case '?': |
| case ':': |
| case '.': |
| case '+': |
| case '-': |
| case '*': |
| case '/': |
| case '&': |
| case '|': |
| case '^': |
| case '%': |
| case '(': |
| case ')': |
| case '{': |
| case '}': |
| case '[': |
| case ']': |
| case ';': |
| shift(1); |
| return static_cast<int>(c1); |
| default: |
| return -1; |
| } |
| } |
| |
| unsigned short Lexer::singleEscape(unsigned short c) |
| { |
| switch(c) { |
| case 'b': |
| return 0x08; |
| case 't': |
| return 0x09; |
| case 'n': |
| return 0x0A; |
| case 'v': |
| return 0x0B; |
| case 'f': |
| return 0x0C; |
| case 'r': |
| return 0x0D; |
| case '"': |
| return 0x22; |
| case '\'': |
| return 0x27; |
| case '\\': |
| return 0x5C; |
| default: |
| return c; |
| } |
| } |
| |
| unsigned short Lexer::convertOctal(int c1, int c2, int c3) |
| { |
| return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0'); |
| } |
| |
| unsigned char Lexer::convertHex(int c) |
| { |
| if (c >= '0' && c <= '9') |
| return static_cast<unsigned char>(c - '0'); |
| if (c >= 'a' && c <= 'f') |
| return static_cast<unsigned char>(c - 'a' + 10); |
| return static_cast<unsigned char>(c - 'A' + 10); |
| } |
| |
| unsigned char Lexer::convertHex(int c1, int c2) |
| { |
| return ((convertHex(c1) << 4) + convertHex(c2)); |
| } |
| |
| KJS::UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4) |
| { |
| return KJS::UChar((convertHex(c1) << 4) + convertHex(c2), |
| (convertHex(c3) << 4) + convertHex(c4)); |
| } |
| |
| void Lexer::record8(int c) |
| { |
| ASSERT(c >= 0); |
| ASSERT(c <= 0xff); |
| |
| // enlarge buffer if full |
| if (pos8 >= size8 - 1) { |
| char *tmp = new char[2 * size8]; |
| memcpy(tmp, buffer8, size8 * sizeof(char)); |
| delete [] buffer8; |
| buffer8 = tmp; |
| size8 *= 2; |
| } |
| |
| buffer8[pos8++] = (char) c; |
| } |
| |
| void Lexer::record16(int c) |
| { |
| ASSERT(c >= 0); |
| ASSERT(c <= USHRT_MAX); |
| record16(UChar(static_cast<unsigned short>(c))); |
| } |
| |
| void Lexer::record16(KJS::UChar c) |
| { |
| // enlarge buffer if full |
| if (pos16 >= size16 - 1) { |
| KJS::UChar *tmp = new KJS::UChar[2 * size16]; |
| memcpy(tmp, buffer16, size16 * sizeof(KJS::UChar)); |
| delete [] buffer16; |
| buffer16 = tmp; |
| size16 *= 2; |
| } |
| |
| buffer16[pos16++] = c; |
| } |
| |
| bool Lexer::scanRegExp() |
| { |
| pos16 = 0; |
| bool lastWasEscape = false; |
| bool inBrackets = false; |
| |
| while (1) { |
| if (isLineTerminator() || current == -1) |
| return false; |
| else if (current != '/' || lastWasEscape == true || inBrackets == true) |
| { |
| // keep track of '[' and ']' |
| if (!lastWasEscape) { |
| if ( current == '[' && !inBrackets ) |
| inBrackets = true; |
| if ( current == ']' && inBrackets ) |
| inBrackets = false; |
| } |
| record16(current); |
| lastWasEscape = |
| !lastWasEscape && (current == '\\'); |
| } |
| else { // end of regexp |
| pattern = UString(buffer16, pos16); |
| pos16 = 0; |
| shift(1); |
| break; |
| } |
| shift(1); |
| } |
| |
| while (isIdentPart(current)) { |
| record16(current); |
| shift(1); |
| } |
| flags = UString(buffer16, pos16); |
| |
| return true; |
| } |
| |
| |
| void Lexer::doneParsing() |
| { |
| for (unsigned i = 0; i < numIdentifiers; i++) { |
| delete identifiers[i]; |
| } |
| fastFree(identifiers); |
| identifiers = 0; |
| numIdentifiers = 0; |
| identifiersCapacity = 0; |
| |
| for (unsigned i = 0; i < numStrings; i++) { |
| delete strings[i]; |
| } |
| fastFree(strings); |
| strings = 0; |
| numStrings = 0; |
| stringsCapacity = 0; |
| } |
| |
| const int initialCapacity = 64; |
| const int growthFactor = 2; |
| |
| // FIXME: this completely ignores its parameters, instead using buffer16 and pos16 - wtf? |
| Identifier *Lexer::makeIdentifier(KJS::UChar*, unsigned int) |
| { |
| if (numIdentifiers == identifiersCapacity) { |
| identifiersCapacity = (identifiersCapacity == 0) ? initialCapacity : identifiersCapacity *growthFactor; |
| identifiers = (KJS::Identifier **)fastRealloc(identifiers, sizeof(KJS::Identifier *) * identifiersCapacity); |
| } |
| |
| KJS::Identifier *identifier = new KJS::Identifier(buffer16, pos16); |
| identifiers[numIdentifiers++] = identifier; |
| return identifier; |
| } |
| |
| // FIXME: this completely ignores its parameters, instead using buffer16 and pos16 - wtf? |
| UString *Lexer::makeUString(KJS::UChar*, unsigned int) |
| { |
| if (numStrings == stringsCapacity) { |
| stringsCapacity = (stringsCapacity == 0) ? initialCapacity : stringsCapacity *growthFactor; |
| strings = (UString **)fastRealloc(strings, sizeof(UString *) * stringsCapacity); |
| } |
| |
| UString *string = new UString(buffer16, pos16); |
| strings[numStrings++] = string; |
| return string; |
| } |
| |
| } |