2010-01-08 Pavel Feldman <pfeldman@chromium.org>
Reviewed by Timothy Hatcher.
Web Inspector: Regex-based syntax highlighting is slow.
https://bugs.webkit.org/show_bug.cgi?id=33330
* WebCore.gypi:
* WebCore.vcproj/WebCore.vcproj:
* inspector/front-end/JavaScriptHighlighterScheme.js: Removed.
* inspector/front-end/JavaScriptTokenizer.js: Added.
(WebInspector.JavaScriptTokenizer):
(WebInspector.JavaScriptTokenizer.prototype.set line):
(WebInspector.JavaScriptTokenizer.prototype.getCondition):
(WebInspector.JavaScriptTokenizer.prototype.setCondition):
(WebInspector.JavaScriptTokenizer.prototype._charAt):
(WebInspector.JavaScriptTokenizer.prototype.nextToken):
* inspector/front-end/JavaScriptTokenizer.re2js: Added.
* inspector/front-end/TextEditorHighlighter.js:
(WebInspector.TextEditorHighlighter):
(WebInspector.TextEditorHighlighter.prototype.highlight):
(WebInspector.TextEditorHighlighter.prototype._lex):
* inspector/front-end/WebKit.qrc:
* inspector/front-end/inspector.html:
git-svn-id: http://svn.webkit.org/repository/webkit/trunk@52985 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/WebCore/inspector/front-end/JavaScriptTokenizer.re2js b/WebCore/inspector/front-end/JavaScriptTokenizer.re2js
new file mode 100644
index 0000000..c4e486a
--- /dev/null
+++ b/WebCore/inspector/front-end/JavaScriptTokenizer.re2js
@@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2009 Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// Generate js file as follows:
+//
+// re2c -isc WebCore/inspector/front-end/JavaScriptTokenizer.re2js \
+// | sed 's|^yy\([^:]*\)*\:|case \1:|' \
+// | sed 's|[*]cursor[+][+]|this._charAt(cursor++)|' \
+// | sed 's|[[*][+][+]cursor|this._charAt(++cursor)|' \
+// | sed 's|[*]cursor|this._charAt(cursor)|' \
+// | sed 's|yych = \*\([^;]*\)|yych = this._charAt\1|' \
+// | sed 's|goto case \([^;]*\)|{ gotoCase = \1; continue; }|' \
+// | sed 's|unsigned\ int|var|' \
+// | sed 's|var\ yych|case 1: var yych|'
+
+WebInspector.JavaScriptTokenizer = function()
+{
+ this._tokens = [];
+ this._keywords = [
+ "null", "true", "false", "break", "case", "catch", "const", "default", "finally", "for",
+ "instanceof", "new", "var", "continue", "function", "return", "void", "delete", "if",
+ "this", "do", "while", "else", "in", "switch", "throw", "try", "typeof", "debugger",
+ "class", "enum", "export", "extends", "import", "super", "get", "set"
+ ].keySet();
+
+ this._conditions = {
+ DIV: 0,
+ NODIV: 1,
+ COMMENT: 2,
+ DSTRING: 3,
+ SSTRING: 4,
+ REGEX: 5
+ };
+
+ this.case_DIV = 1000;
+ this.case_NODIV = 1001;
+ this.case_COMMENT = 1002;
+ this.case_DSTRING = 1003;
+ this.case_SSTRING = 1004;
+ this.case_REGEX = 1005;
+
+ this.initialCondition = this._conditions.NODIV;
+ this.condition = this.initialCondition;
+}
+
+WebInspector.JavaScriptTokenizer.prototype = {
+
+ set line(line) {
+ this._line = line;
+ },
+
+ getCondition: function()
+ {
+ return this.condition;
+ },
+
+ setCondition: function(condition)
+ {
+ this.condition = condition;
+ },
+
+ _charAt: function(cursor)
+ {
+ return cursor < this._line.length ? this._line.charAt(cursor) : "\n";
+ },
+
+ nextToken: function(cursor)
+ {
+ var cursorOnEnter = cursor;
+ var gotoCase = 1;
+ while (1) {
+ switch (gotoCase)
+ // Following comment is replaced with generated state machine.
+ /*!re2c
+ re2c:define:YYCTYPE = "var";
+ re2c:define:YYCURSOR = cursor;
+ re2c:define:YYGETCONDITION = "this.getCondition";
+ re2c:define:YYSETCONDITION = "this.setCondition";
+ re2c:condprefix = "case this.case_";
+ re2c:condenumprefix = "this._conditions.";
+ re2c:yyfill:enable = 0;
+ re2c:labelprefix = "case ";
+ re2c:indent:top = 2;
+ re2c:indent:string = " ";
+
+ LineComment = "//" [^\r\n]*;
+ CommentContent = ([^*\r\n] | ("*"+[^/*]))*;
+ Comment = "/*" CommentContent "*"+ "/";
+ CommentStart = "/*" CommentContent [\r\n];
+ CommentEnd = CommentContent "*"+ "/";
+
+ DecimalDigit = [0-9];
+ NonZeroDigit = [1-9];
+ OctalDigit = [0-7];
+ HexDigit = [0-9a-fA-F];
+ SignedInteger = ("+"|"-")? DecimalDigit+;
+ ExponentPart = ("e" | "E") SignedInteger;
+ DecimalIntegerLiteral = "0" | NonZeroDigit DecimalDigit*;
+ DecimalLiteral = DecimalIntegerLiteral "." DecimalDigit* ExponentPart? | "." DecimalDigit+ ExponentPart? | DecimalIntegerLiteral ExponentPart?;
+ HexIntegerLiteral = "0" ("x"|"X") HexDigit+;
+ OctalIntegerLiteral = "0" OctalDigit+;
+ NumericLiteral = DecimalLiteral | HexIntegerLiteral | OctalIntegerLiteral;
+
+ Punctuation = [\!\%\&\(\*\+\,\-\.\:\;\<\=\>\?\[\]\^\{\|\}\~] | "!=" | "!==" | "%=" | "&&" | "&=" | "*=" | "++" | "+=" | "--" | "-=" | "<<" | "<<=" | "<=" | "==" | "===" | ">=" | ">>" | ">>=" | ">>>" | ">>>=" | "^=" | "|=" | "||";
+ Division = "/" | "/=";
+ RightParen = ")";
+
+ Letter = [a-zA-Z\x80-\xFF];
+ UnicodeEscapeSequence = "\\u" HexDigit HexDigit HexDigit HexDigit;
+
+ IdentifierStart = Letter | "_" | "$" | UnicodeEscapeSequence;
+ IdentifierPart = IdentifierStart | DecimalDigit;
+ Identifier = IdentifierStart IdentifierPart *;
+
+ DoubleStringContent = ([^\r\n\"\\] | UnicodeEscapeSequence | "\\" ['"\\bfnrtv])*;
+ SingleStringContent = ([^\r\n\'\\] | UnicodeEscapeSequence | "\\" ['"\\bfnrtv])*;
+ StringLiteral = "\"" DoubleStringContent "\"" | "'" SingleStringContent "'";
+ DoubleStringStart = "\"" DoubleStringContent "\\" [\r\n];
+ DoubleStringEnd = DoubleStringContent "\"";
+ SingleStringStart = "\'" SingleStringContent "\\" [\r\n];
+ SingleStringEnd = SingleStringContent "\'";
+
+ BackslashSequence = "\\" [^\r\n];
+ RegexSet = "[" ([^\r\n*\\/] | BackslashSequence)* "]";
+ RegexFirstChar = [^\r\n*\\/\[\]] | BackslashSequence | RegexSet;
+ RegexChar = [^\r\n\\/\[\]] | BackslashSequence | RegexSet;
+ RegexContent = RegexChar*;
+ Regex = "/" RegexFirstChar RegexContent "/" [igm]*;
+ RegexStart = "/" RegexFirstChar RegexContent "\\";
+ RegexEnd = RegexContent "/" [igm]*;
+
+ <DIV,NODIV> LineComment { this.tokenType = "comment"; return cursor; }
+ <DIV,NODIV> Comment { this.tokenType = "comment"; return cursor; }
+ <DIV,NODIV> CommentStart => COMMENT { this.tokenType = "comment"; return cursor; }
+ <COMMENT> CommentContent => COMMENT { this.tokenType = "comment"; return cursor; }
+ <COMMENT> CommentEnd => NODIV { this.tokenType = "comment"; return cursor; }
+
+ <DIV,NODIV> StringLiteral { this.tokenType = "string"; return cursor; }
+ <DIV,NODIV> DoubleStringStart => DSTRING { this.tokenType = "string"; return cursor; }
+ <DSTRING> DoubleStringContent => DSTRING { this.tokenType = "string"; return cursor; }
+ <DSTRING> DoubleStringEnd => NODIV { this.tokenType = "string"; return cursor; }
+ <DIV,NODIV> SingleStringStart => SSTRING { this.tokenType = "string"; return cursor; }
+ <SSTRING> SingleStringContent => SSTRING { this.tokenType = "string"; return cursor; }
+ <SSTRING> SingleStringEnd => NODIV { this.tokenType = "string"; return cursor; }
+
+ <NODIV> Regex { this.tokenType = "regex"; return cursor; }
+ <NODIV> RegexStart => REGEX { this.tokenType = "regex"; return cursor; }
+ <REGEX> RegexContent => REGEX { this.tokenType = "regex"; return cursor; }
+ <REGEX> RegexEnd => NODIV { this.tokenType = "regex"; return cursor; }
+
+ <DIV,NODIV> NumericLiteral => DIV { this.tokenType = "number"; return cursor; }
+ <DIV,NODIV> Identifier => DIV
+ {
+ var token = this._line.substring(cursorOnEnter, cursor);
+ if (token in this._keywords)
+ this.tokenType = "keyword";
+ else
+ this.tokenType = null;
+ return cursor;
+ }
+ <DIV,NODIV> RightParen => DIV { this.tokenType = null; return cursor; }
+ <DIV,NODIV> Punctuation => NODIV { this.tokenType = null; return cursor; }
+ <DIV> Division => NODIV { this.tokenType = null; return cursor; }
+ <*> [^] { this.tokenType = null; return cursor; }
+ */
+ }
+ }
+}