blob: 1ce27af787e1d9d8c7806bf2052acbfdd03d7d84 [file] [log] [blame]
// -*- c-basic-offset: 2 -*-
/*
* This file is part of the KDE libraries
* Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
* Copyright (C) 2007 Apple Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef Lexer_h
#define Lexer_h
#include "ustring.h"
#include <wtf/Vector.h>
namespace KJS {
class Identifier;
class RegExp;
class Lexer : Noncopyable {
public:
void setCode(int startingLineNumber, const UChar *c, unsigned int len);
int lex();
int lineNo() const { return yylineno; }
bool prevTerminator() const { return terminator; }
enum State { Start,
IdentifierOrKeyword,
Identifier,
InIdentifierOrKeyword,
InIdentifier,
InIdentifierStartUnicodeEscapeStart,
InIdentifierStartUnicodeEscape,
InIdentifierPartUnicodeEscapeStart,
InIdentifierPartUnicodeEscape,
InSingleLineComment,
InMultiLineComment,
InNum,
InNum0,
InHex,
InOctal,
InDecimal,
InExponentIndicator,
InExponent,
Hex,
Octal,
Number,
String,
Eof,
InString,
InEscapeSequence,
InHexEscape,
InUnicodeEscape,
Other,
Bad };
bool scanRegExp();
const UString& pattern() const { return m_pattern; }
const UString& flags() const { return m_flags; }
static unsigned char convertHex(int);
static unsigned char convertHex(int c1, int c2);
static UChar convertUnicode(int c1, int c2, int c3, int c4);
static bool isIdentStart(int);
static bool isIdentPart(int);
static bool isHexDigit(int);
bool sawError() const { return error; }
void clear();
private:
friend Lexer& lexer();
Lexer();
int yylineno;
bool done;
Vector<char> m_buffer8;
Vector<UChar> m_buffer16;
bool terminator;
bool restrKeyword;
// encountered delimiter like "'" and "}" on last run
bool delimited;
bool skipLF;
bool skipCR;
bool eatNextIdentifier;
int stackToken;
int lastToken;
State state;
void setDone(State);
unsigned int pos;
void shift(unsigned int p);
void nextLine();
int lookupKeyword(const char *);
bool isWhiteSpace() const;
bool isLineTerminator();
static bool isOctalDigit(int);
int matchPunctuator(int c1, int c2, int c3, int c4);
static unsigned short singleEscape(unsigned short);
static unsigned short convertOctal(int c1, int c2, int c3);
void record8(int);
void record16(int);
void record16(UChar);
KJS::Identifier* makeIdentifier(const Vector<UChar>& buffer);
UString* makeUString(const Vector<UChar>& buffer);
const UChar* code;
unsigned int length;
int yycolumn;
int atLineStart;
bool error;
// current and following unicode characters (int to allow for -1 for end-of-file marker)
int current, next1, next2, next3;
Vector<UString*> m_strings;
Vector<KJS::Identifier*> m_identifiers;
UString m_pattern;
UString m_flags;
};
Lexer& lexer(); // Returns the singletone JavaScript lexer.
} // namespace KJS
#endif // Lexer_h