| /* |
| * Copyright (C) 2009 Google Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: |
| * |
| * * Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * * Redistributions in binary form must reproduce the above |
| * copyright notice, this list of conditions and the following disclaimer |
| * in the documentation and/or other materials provided with the |
| * distribution. |
| * * Neither the name of Google Inc. nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| // Generate js file as follows: |
| // |
| // re2c -isc WebCore/inspector/front-end/SourceHTMLTokenizer.re2js \ |
| // | sed 's|^yy\([^:]*\)*\:|case \1:|' \ |
| // | sed 's|[*]cursor[+][+]|this._charAt(cursor++)|' \ |
| // | sed 's|[[*][+][+]cursor|this._charAt(++cursor)|' \ |
| // | sed 's|[*]cursor|this._charAt(cursor)|' \ |
| // | sed 's|yych = \*\([^;]*\)|yych = this._charAt\1|' \ |
| // | sed 's|goto case \([^;]*\)|{ gotoCase = \1; continue; }|' \ |
| // | sed 's|unsigned\ int|var|' \ |
| // | sed 's|var\ yych|case 1: var yych|' |
| |
| WebInspector.SourceHTMLTokenizer = function() |
| { |
| WebInspector.SourceTokenizer.call(this); |
| |
| // The order is determined by the generated code. |
| this._lexConditions = { |
| INITIAL: 0, |
| COMMENT: 1, |
| DOCTYPE: 2, |
| TAG: 3, |
| DSTRING: 4, |
| SSTRING: 5 |
| }; |
| this.case_INITIAL = 1000; |
| this.case_COMMENT = 1001; |
| this.case_DOCTYPE = 1002; |
| this.case_TAG = 1003; |
| this.case_DSTRING = 1004; |
| this.case_SSTRING = 1005; |
| |
| this._parseConditions = { |
| INITIAL: 0, |
| ATTRIBUTE: 1, |
| ATTRIBUTE_VALUE: 2, |
| LINKIFY: 4, |
| A_NODE: 8, |
| SCRIPT: 16 |
| }; |
| |
| this.initialCondition = { lexCondition: this._lexConditions.INITIAL, parseCondition: this._parseConditions.INITIAL }; |
| this.condition = this.initialCondition; |
| } |
| |
| WebInspector.SourceHTMLTokenizer.prototype = { |
| set line(line) { |
| if (this._internalJavaScriptTokenizer) { |
| var match = /<\/script/i.exec(line); |
| if (match) { |
| this._internalJavaScriptTokenizer.line = line.substring(0, match.index); |
| } else |
| this._internalJavaScriptTokenizer.line = line; |
| } |
| this._line = line; |
| }, |
| |
| _isExpectingAttribute: function() |
| { |
| return this._condition.parseCondition & this._parseConditions.ATTRIBUTE; |
| }, |
| |
| _isExpectingAttributeValue: function() |
| { |
| return this._condition.parseCondition & this._parseConditions.ATTRIBUTE_VALUE; |
| }, |
| |
| _setExpectingAttribute: function() |
| { |
| if (this._isExpectingAttributeValue()) |
| this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE_VALUE; |
| this._condition.parseCondition |= this._parseConditions.ATTRIBUTE; |
| }, |
| |
| _setExpectingAttributeValue: function() |
| { |
| if (this._isExpectingAttribute()) |
| this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE; |
| this._condition.parseCondition |= this._parseConditions.ATTRIBUTE_VALUE; |
| }, |
| |
| _stringToken: function(cursor, stringEnds) |
| { |
| if (!this._isExpectingAttributeValue()) { |
| this.tokenType = null; |
| return cursor; |
| } |
| this.tokenType = this._attrValueTokenType(); |
| if (stringEnds) |
| this._setExpectingAttribute(); |
| return cursor; |
| }, |
| |
| _attrValueTokenType: function() |
| { |
| if (this._condition.parseCondition & this._parseConditions.LINKIFY) { |
| if (this._condition.parseCondition & this._parseConditions.A_NODE) |
| return "html-external-link"; |
| return "html-resource-link"; |
| } |
| return "html-attribute-value"; |
| }, |
| |
| nextToken: function(cursor) |
| { |
| if (this._internalJavaScriptTokenizer) { |
| // Re-set line to force </script> detection first. |
| this.line = this._line; |
| if (cursor !== this._internalJavaScriptTokenizer._line.length) { |
| // Tokenizer is stateless, so restore its condition before tokenizing and save it after. |
| this._internalJavaScriptTokenizer.condition = this._condition.internalJavaScriptTokenizerCondition; |
| var result = this._internalJavaScriptTokenizer.nextToken(cursor); |
| this.tokenType = this._internalJavaScriptTokenizer.tokenType; |
| this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.condition; |
| return result; |
| } else if (cursor !== this._line.length) |
| delete this._internalJavaScriptTokenizer; |
| } |
| |
| var cursorOnEnter = cursor; |
| var gotoCase = 1; |
| while (1) { |
| switch (gotoCase) |
| // Following comment is replaced with generated state machine. |
| /*!re2c |
| re2c:define:YYCTYPE = "var"; |
| re2c:define:YYCURSOR = cursor; |
| re2c:define:YYGETCONDITION = "this.getLexCondition"; |
| re2c:define:YYSETCONDITION = "this.setLexCondition"; |
| re2c:condprefix = "case this.case_"; |
| re2c:condenumprefix = "this._lexConditions."; |
| re2c:yyfill:enable = 0; |
| re2c:labelprefix = "case "; |
| re2c:indent:top = 2; |
| re2c:indent:string = " "; |
| |
| CommentContent = ([^-\r\n] | ("--" [^>]))*; |
| Comment = "<!--" CommentContent "-->"; |
| CommentStart = "<!--" CommentContent [\r\n]; |
| CommentEnd = CommentContent "-->"; |
| |
| DocTypeStart = "<!" [Dd] [Oo] [Cc] [Tt] [Yy] [Pp] [Ee]; |
| DocTypeContent = [^\r\n>]*; |
| |
| ScriptStart = "<" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt]; |
| ScriptEnd = "</" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt]; |
| |
| LT = "<" | "</"; |
| GT = ">"; |
| EqualSign = "="; |
| |
| DoubleStringContent = [^\r\n\"]*; |
| SingleStringContent = [^\r\n\']*; |
| StringLiteral = "\"" DoubleStringContent "\"" | "'" SingleStringContent "'"; |
| DoubleStringStart = "\"" DoubleStringContent [\r\n]; |
| DoubleStringEnd = DoubleStringContent "\""; |
| SingleStringStart = "'" SingleStringContent [\r\n]; |
| SingleStringEnd = SingleStringContent "'"; |
| |
| Identifier = [^ \r\n"'<>\[\]=]+; |
| |
| <INITIAL> Comment { this.tokenType = "html-comment"; return cursor; } |
| <INITIAL> CommentStart => COMMENT { this.tokenType = "html-comment"; return cursor; } |
| <COMMENT> CommentContent => COMMENT { this.tokenType = "html-comment"; return cursor; } |
| <COMMENT> CommentEnd => INITIAL { this.tokenType = "html-comment"; return cursor; } |
| |
| <INITIAL> DocTypeStart => DOCTYPE { this.tokenType = "html-doctype"; return cursor; } |
| <DOCTYPE> DocTypeContent => DOCTYPE { this.tokenType = "html-doctype"; return cursor; } |
| <DOCTYPE> GT => INITIAL { this.tokenType = "html-doctype"; return cursor; } |
| |
| <INITIAL> ScriptStart => TAG |
| { |
| if (this._condition.parseCondition & this._parseConditions.SCRIPT) { |
| // Do not tokenize script tag contents, keep lexer state although processing "<". |
| this.setLexCondition(this._lexConditions.INITIAL); |
| this.tokenType = null; |
| return cursor; |
| } |
| this.tokenType = "html-tag"; |
| this._condition.parseCondition = this._parseConditions.SCRIPT; |
| this._setExpectingAttribute(); |
| return cursor; |
| } |
| |
| <INITIAL> ScriptEnd => TAG |
| { |
| this.tokenType = "html-tag"; |
| this._condition.parseCondition = this._parseConditions.INITIAL; |
| return cursor; |
| } |
| |
| <INITIAL> LT => TAG |
| { |
| if (this._condition.parseCondition & this._parseConditions.SCRIPT) { |
| // Do not tokenize script tag contents, keep lexer state although processing "<". |
| this.setLexCondition(this._lexConditions.INITIAL); |
| this.tokenType = null; |
| return cursor; |
| } |
| |
| this._condition.parseCondition = this._parseConditions.INITIAL; |
| this.tokenType = "html-tag"; |
| return cursor; |
| } |
| |
| <TAG> GT => INITIAL |
| { |
| this.tokenType = "html-tag"; |
| if (this._condition.parseCondition & this._parseConditions.SCRIPT) { |
| if (!this._internalJavaScriptTokenizer) { |
| this._internalJavaScriptTokenizer = WebInspector.SourceTokenizer.Registry.getInstance().getTokenizer("text/javascript"); |
| this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.initialCondition; |
| } |
| // Do not tokenize script tag contents. |
| return cursor; |
| } |
| |
| this._condition.parseCondition = this._parseConditions.INITIAL; |
| return cursor; |
| } |
| |
| <TAG> StringLiteral { return this._stringToken(cursor, true); } |
| <TAG> DoubleStringStart => DSTRING { return this._stringToken(cursor); } |
| <DSTRING> DoubleStringContent => DSTRING { return this._stringToken(cursor); } |
| <DSTRING> DoubleStringEnd => TAG { return this._stringToken(cursor, true); } |
| <TAG> SingleStringStart => SSTRING { return this._stringToken(cursor); } |
| <SSTRING> SingleStringContent => SSTRING { return this._stringToken(cursor); } |
| <SSTRING> SingleStringEnd => TAG { return this._stringToken(cursor, true); } |
| |
| <TAG> EqualSign => TAG |
| { |
| if (this._isExpectingAttribute()) |
| this._setExpectingAttributeValue(); |
| this.tokenType = null; |
| return cursor; |
| } |
| |
| <TAG> Identifier |
| { |
| if (this._condition.parseCondition === this._parseConditions.SCRIPT) { |
| // Fall through if expecting attributes. |
| this.tokenType = null; |
| return cursor; |
| } |
| |
| if (this._condition.parseCondition === this._parseConditions.INITIAL) { |
| this.tokenType = "html-tag"; |
| this._setExpectingAttribute(); |
| var token = this._line.substring(cursorOnEnter, cursor); |
| if (token === "a") |
| this._condition.parseCondition |= this._parseConditions.A_NODE; |
| else if (this._condition.parseCondition & this._parseConditions.A_NODE) |
| this._condition.parseCondition ^= this._parseConditions.A_NODE; |
| } else if (this._isExpectingAttribute()) { |
| var token = this._line.substring(cursorOnEnter, cursor); |
| if (token === "href" || token === "src") |
| this._condition.parseCondition |= this._parseConditions.LINKIFY; |
| else if (this._condition.parseCondition |= this._parseConditions.LINKIFY) |
| this._condition.parseCondition ^= this._parseConditions.LINKIFY; |
| this.tokenType = "html-attribute-name"; |
| } else if (this._isExpectingAttributeValue()) |
| this.tokenType = this._attrValueTokenType(); |
| else |
| this.tokenType = null; |
| return cursor; |
| } |
| <*> [^] { this.tokenType = null; return cursor; } |
| */ |
| } |
| } |
| } |
| |
| WebInspector.SourceHTMLTokenizer.prototype.__proto__ = WebInspector.SourceTokenizer.prototype; |