Source/WebInspectorUI/UserInterface/Workers/Formatter/HTMLParser.js - WebKit - Git at Google

 /*
  * Copyright (C) 2019 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */

 HTMLParser = class HTMLParser {

     // Public

     parseDocument(sourceText, treeBuilder, {isXML} = {})
     {
         console.assert(typeof sourceText === "string");
         console.assert(treeBuilder);
         console.assert(treeBuilder.pushParserNode);

         this._treeBuilder = treeBuilder;

         this._pos = 0;
         this._mode = HTMLParser.Mode.Data;
         this._data = sourceText;
         this._bogusCommentOpener = null;
         this._isXML = !!isXML;

         if (this._treeBuilder.begin)
             this._treeBuilder.begin();

         while (this._pos < this._data.length)
             this._parse();

         if (this._treeBuilder.end)
             this._treeBuilder.end();
     }

     // Private

     _isEOF()
     {
         return this._pos === this._data.length;
     }

     _peek(n = 1)
     {
         return this._data.substring(this._pos, this._pos + n);
     }

     _peekCharacterRegex(regex)
     {
         return regex.test(this._data.charAt(this._pos));
     }

     _peekString(str)
     {
         for (let i = 0; i < str.length; ++i) {
             let c = str[i];
             if (this._data.charAt(this._pos + i) !== c)
                 return false;
         }

         return true;
     }

     _peekCaseInsensitiveString(str)
     {
         console.assert(str.toLowerCase() === str, "String should be passed in as lowercase.");

         for (let i = 0; i < str.length; ++i) {
             let d = this._data.charAt(this._pos + i);
             if (!d)
                 return false;
             let c = str[i];
             if (d.toLowerCase() !== c)
                 return false;
         }

         return true;
     }

     _consumeRegex(regex)
     {
         let startIndex = this._pos;
         while (regex.test(this._data.charAt(this._pos)))
             this._pos++;

         return this._data.substring(startIndex, this._pos);
     }

     _consumeWhitespace()
     {
         return this._consumeRegex(/\s/);
     }

     _consumeUntilString(str, newMode)
     {
         let index = this._data.indexOf(str, this._pos);
         if (index === -1) {
             let startIndex = this._pos;
             this._pos = this._data.length;
             if (newMode)
                 this._mode = newMode;
             return this._data.substring(startIndex, this._data.length);
         }

         let startIndex = this._pos;
         this._pos = index + str.length;
         if (newMode)
             this._mode = newMode;
         return this._data.substring(startIndex, index);
     }

     _consumeDoubleQuotedString()
     {
         console.assert(this._peekString(`"`));
         this._pos++;
         let string = this._consumeUntilString(`"`);
         return string;
     }

     _consumeSingleQuotedString()
     {
         console.assert(this._peekString(`'`));
         this._pos++;
         let string = this._consumeUntilString(`'`);
         return string;
     }

     // Parser
     // This is a crude implementation of HTML tokenization:
     // https://html.spec.whatwg.org/multipage/parsing.html

     _parse()
     {
         switch (this._mode) {
         case HTMLParser.Mode.Data:
             return this._parseData();
         case HTMLParser.Mode.ScriptData:
             return this._parseScriptData();
         case HTMLParser.Mode.TagOpen:
             return this._parseTagOpen();
         case HTMLParser.Mode.Attr:
             return this._parseAttr();
         case HTMLParser.Mode.CData:
             return this._parseCData();
         case HTMLParser.Mode.Doctype:
             return this._parseDoctype();
         case HTMLParser.Mode.Comment:
             return this._parseComment();
         case HTMLParser.Mode.BogusComment:
             return this._parseBogusComment();
         }

         console.assert();
         throw "Missing parser mode";
     }

     _parseData()
     {
         let startPos = this._pos;
         let text = this._consumeUntilString("<", HTMLParser.Mode.TagOpen);
         if (text)
             this._push({type: HTMLParser.NodeType.Text, data: text, pos: startPos});

         if (this._isEOF() && this._data.endsWith("<"))
             this._handleEOF(this._pos - 1);
     }

     _parseScriptData()
     {
         let startPos = this._pos;
         let scriptText = "";

         // Parse as text until </script>.
         while (true) {
             scriptText += this._consumeUntilString("<");
             if (this._peekCaseInsensitiveString("/script>")) {
                 this._pos += "/script>".length;
                 this._mode = HTMLParser.Mode.Data;
                 break;
             }
             if (this._handleEOF(startPos))
                 return;
             scriptText += "<";
         }

         if (scriptText)
             this._push({type: HTMLParser.NodeType.Text, data: scriptText, pos: startPos});
         this._push({type: HTMLParser.NodeType.CloseTag, name: "script", pos: startPos + scriptText.length});
     }

     _parseTagOpen()
     {
         // |<tag
         this._currentTagStartPos = this._pos - 1;

         if (this._peekString("!")) {
             // Comment.
             if (this._peekString("!--")) {
                 this._pos += "!--".length;
                 this._mode = HTMLParser.Mode.Comment;
                 this._handleEOF(this._currentTagStartPos);
                 return;
             }

             // DOCTYPE.
             if (this._peekCaseInsensitiveString("!doctype")) {
                 let startPos = this._pos;
                 this._pos += "!DOCTYPE".length;
                 this._doctypeRaw = this._data.substring(startPos, this._pos);
                 this._mode = HTMLParser.Mode.Doctype;
                 this._handleEOF(this._currentTagStartPos);
                 return;
             }

             // CDATA.
             if (this._peekString("![CDATA[")) {
                 this._pos += "![CDATA[".length;
                 this._mode = HTMLParser.Mode.CData;
                 this._handleEOF(this._currentTagStartPos);
                 return;
             }

             // Bogus Comment.
             this._pos++;
             this._mode = HTMLParser.Mode.BogusComment;
             this._handleEOF(this._currentTagStartPos);
             return;
         }

         if (this._peekString("?")) {
             // Bogus Comment.
             this._pos++;
             this._mode = HTMLParser.Mode.BogusComment;
             this._bogusCommentOpener = "<?";
             this._handleEOF(this._currentTagStartPos);
             return;
         }

         if (this._peekString("/")) {
             // End Tag.
             this._pos++;
             let text = this._consumeUntilString(">", HTMLParser.Mode.Data);
             this._push({type: HTMLParser.NodeType.CloseTag, name: text, pos: this._currentTagStartPos});
             return;
         }

         // ASCII - Open Tag
         if (this._peekCharacterRegex(/[a-z]/i)) {
             let text = this._consumeRegex(/[^\s/>]+/);
             if (text) {
                 if (this._peekCharacterRegex(/\s/)) {
                     this._currentTagName = text;
                     this._currentTagAttributes = [];
                     this._mode = HTMLParser.Mode.Attr;
                     return;
                 }

                 if (this._peekString("/>")) {
                     this._pos += "/>".length;
                     this._mode = HTMLParser.Mode.Data;
                     this._push({type: HTMLParser.NodeType.OpenTag, name: text, closed: true, pos: this._currentTagStartPos});
                     return;
                 }

                 if (this._peekString(">")) {
                     this._pos++;
                     this._mode = HTMLParser.Mode.Data;
                     this._push({type: HTMLParser.NodeType.OpenTag, name: text, closed: false, pos: this._currentTagStartPos});
                     return;
                 }

                 // End of document. Output any remaining data as error text.
                 console.assert(this._isEOF());
                 this._push({type: HTMLParser.NodeType.ErrorText, data: "<" + text, pos: this._currentTagStartPos});
                 return;
             }
         }

         // Anything else, treat as text.
         this._push({type: HTMLParser.NodeType.Text, data: "<", pos: this._currentTagStartPos});
         this._mode = HTMLParser.Mode.Data;
     }

     _parseAttr()
     {
         this._consumeWhitespace();

         if (this._peekString("/>")) {
             this._pos += "/>".length;
             this._mode = HTMLParser.Mode.Data;
             this._push({type: HTMLParser.NodeType.OpenTag, name: this._currentTagName, closed: true, attributes: this._currentTagAttributes, pos: this._currentTagStartPos});
             return;
         }

         if (this._peekString(">")) {
             this._pos++;
             this._mode = HTMLParser.Mode.Data;
             this._push({type: HTMLParser.NodeType.OpenTag, name: this._currentTagName, closed: false, attributes: this._currentTagAttributes, pos: this._currentTagStartPos});
             return;
         }

         // <tag |attr
         let attributeNameStartPos = this._pos;

         let attributeName = this._consumeRegex(/[^\s=/>]+/);
         // console.assert(attributeName.length > 0, "Unexpected empty attribute name");
         if (this._peekString("/") || this._peekString(">")) {
             if (attributeName)
                 this._pushAttribute({name: attributeName, value: undefined, namePos: attributeNameStartPos});
             return;
         }

         this._consumeWhitespace();

         if (this._peekString("=")) {
             this._pos++;

             // <tag attr=|value
             let attributeValueStartPos = this._pos;

             this._consumeWhitespace();

             if (this._peekString(`"`)) {
                 let attributeValue = this._consumeDoubleQuotedString();
                 this._pushAttribute({name: attributeName, value: attributeValue, quote: HTMLParser.AttrQuoteType.Double, namePos: attributeNameStartPos, valuePos: attributeValueStartPos});
                 return;
             }

             if (this._peekString(`'`)) {
                 let attributeValue = this._consumeSingleQuotedString();
                 this._pushAttribute({name: attributeName, value: attributeValue, quote: HTMLParser.AttrQuoteType.Single, namePos: attributeNameStartPos, valuePos: attributeValueStartPos});
                 return;
             }

             if (this._peekString(">")) {
                 this._pos++;
                 this._mode = HTMLParser.Mode.Data;
                 this._push({type: HTMLParser.NodeType.OpenTag, name: this._currentTagName, closed: false, attributes: this._currentTagAttributes, pos: this._currentTagStartPos});
                 return;
             }

             let whitespace = this._consumeWhitespace();
             if (whitespace) {
                 this._pushAttribute({name: attributeName, value: undefined, quote: HTMLParser.AttrQuoteType.None, namePos: attributeNameStartPos});
                 return;
             }

             let attributeValue = this._consumeRegex(/[^\s=/>]+/);
             this._pushAttribute({name: attributeName, value: attributeValue, quote: HTMLParser.AttrQuoteType.None, namePos: attributeNameStartPos, valuePos: attributeValueStartPos});
             return;
         }

         if (!this._isEOF()) {
             this._pushAttribute({name: attributeName, value: undefined, quote: HTMLParser.AttrQuoteType.None, namePos: attributeNameStartPos});
             return;
         }

         // End of document. Treat everything up to now as error text.
         console.assert(this._isEOF());
         this._push({type: HTMLParser.NodeType.ErrorText, data: this._data.substring(this._currentTagStartPos), pos: this._currentTagStartPos});
         return;
     }

     _parseComment()
     {
         let text = this._consumeUntilString("-->", HTMLParser.Mode.Data);
         if (this._isEOF() && !this._data.endsWith("-->")) {
             this._push({type: HTMLParser.NodeType.ErrorText, data: this._data.substring(this._currentTagStartPos), pos: this._currentTagStartPos});
             return;
         }

         let closePos = this._pos - "-->".length;
         this._push({type: HTMLParser.NodeType.Comment, data: text, pos: this._currentTagStartPos, closePos});
     }

     _parseBogusComment()
     {
         let text = this._consumeUntilString(">", HTMLParser.Mode.Data);
         if (this._isEOF() && !this._data.endsWith(">")) {
             this._push({type: HTMLParser.NodeType.ErrorText, data: this._data.substring(this._currentTagStartPos), pos: this._currentTagStartPos});
             return;
         }

         let closePos = this._pos - ">".length;
         this._push({type: HTMLParser.NodeType.Comment, data: text, opener: this._bogusCommentOpener || "", pos: this._currentTagStartPos, closePos});
         this._bogusCommentOpener = null;
     }

     _parseDoctype()
     {
         let text = this._consumeUntilString(">", HTMLParser.Mode.Data);
         if (this._isEOF() && !this._data.endsWith(">")) {
             this._push({type: HTMLParser.NodeType.ErrorText, data: this._data.substring(this._currentTagStartPos), pos: this._currentTagStartPos});
             return;
         }

         let closePos = this._pos - ">".length;
         this._push({type: HTMLParser.NodeType.Doctype, data: text, raw: this._doctypeRaw, pos: this._currentTagStartPos, closePos});
         this._doctypeRaw = null;
     }

     _parseCData()
     {
         let text = this._consumeUntilString("]]>", HTMLParser.Mode.Data);
         if (this._isEOF() && !this._data.endsWith("]]>")) {
             this._push({type: HTMLParser.NodeType.ErrorText, data: this._data.substring(this._currentTagStartPos), pos: this._currentTagStartPos});
             return;
         }

         let closePos = this._pos - "]]>".length;
         this._push({type: HTMLParser.NodeType.CData, data: text, pos: this._currentTagStartPos, closePos});
     }

     _pushAttribute(attr)
     {
         this._currentTagAttributes.push(attr);
         this._handleEOF(this._currentTagStartPos);
     }

     _handleEOF(lastPosition)
     {
         if (!this._isEOF())
             return false;

         // End of document. Treat everything from the last position as error text.
         this._push({type: HTMLParser.NodeType.ErrorText, data: this._data.substring(lastPosition), pos: lastPosition});
         return true;
     }

     _push(node)
     {
         // Custom mode for some elements.
         if (node.type === HTMLParser.NodeType.OpenTag) {
             if (!this._isXML && node.name.toLowerCase() === "script")
                 this._mode = HTMLParser.Mode.ScriptData;
         }

         this._treeBuilder.pushParserNode(node);
     }
 };

 HTMLParser.Mode = {
     Data: "data",
     TagOpen: "tag-open",
     ScriptData: "script-data",
     Attr: "attr",
     CData: "cdata",
     Doctype: "doctype",
     Comment: "comment",
     BogusComment: "bogus-comment",
 };

 HTMLParser.NodeType = {
     Text: "text",
     ErrorText: "error-text",
     OpenTag: "open-tag",
     CloseTag: "close-tag",
     Comment: "comment",
     Doctype: "doctype",
     CData: "cdata",
 };

 HTMLParser.AttrQuoteType = {
     None: "none",
     Double: "double",
     Single: "single",
 };
	/*
	* Copyright (C) 2019 Apple Inc. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
	* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
	* THE POSSIBILITY OF SUCH DAMAGE.
	*/

	HTMLParser = class HTMLParser {

	// Public

	parseDocument(sourceText, treeBuilder, {isXML} = {})
	{
	console.assert(typeof sourceText === "string");
	console.assert(treeBuilder);
	console.assert(treeBuilder.pushParserNode);

	this._treeBuilder = treeBuilder;

	this._pos = 0;
	this._mode = HTMLParser.Mode.Data;
	this._data = sourceText;
	this._bogusCommentOpener = null;
	this._isXML = !!isXML;

	if (this._treeBuilder.begin)
	this._treeBuilder.begin();

	while (this._pos < this._data.length)
	this._parse();

	if (this._treeBuilder.end)
	this._treeBuilder.end();
	}

	// Private

	_isEOF()
	{
	return this._pos === this._data.length;
	}

	_peek(n = 1)
	{
	return this._data.substring(this._pos, this._pos + n);
	}

	_peekCharacterRegex(regex)
	{
	return regex.test(this._data.charAt(this._pos));
	}

	_peekString(str)
	{
	for (let i = 0; i < str.length; ++i) {
	let c = str[i];
	if (this._data.charAt(this._pos + i) !== c)
	return false;
	}

	return true;
	}

	_peekCaseInsensitiveString(str)
	{
	console.assert(str.toLowerCase() === str, "String should be passed in as lowercase.");

	for (let i = 0; i < str.length; ++i) {
	let d = this._data.charAt(this._pos + i);
	if (!d)
	return false;
	let c = str[i];
	if (d.toLowerCase() !== c)
	return false;
	}

	return true;
	}

	_consumeRegex(regex)
	{
	let startIndex = this._pos;
	while (regex.test(this._data.charAt(this._pos)))
	this._pos++;

	return this._data.substring(startIndex, this._pos);
	}

	_consumeWhitespace()
	{
	return this._consumeRegex(/\s/);
	}

	_consumeUntilString(str, newMode)
	{
	let index = this._data.indexOf(str, this._pos);
	if (index === -1) {
	let startIndex = this._pos;
	this._pos = this._data.length;
	if (newMode)
	this._mode = newMode;
	return this._data.substring(startIndex, this._data.length);
	}

	let startIndex = this._pos;
	this._pos = index + str.length;
	if (newMode)
	this._mode = newMode;
	return this._data.substring(startIndex, index);
	}

	_consumeDoubleQuotedString()
	{
	console.assert(this._peekString(`"`));
	this._pos++;
	let string = this._consumeUntilString(`"`);
	return string;
	}

	_consumeSingleQuotedString()
	{
	console.assert(this._peekString(`'`));
	this._pos++;
	let string = this._consumeUntilString(`'`);
	return string;
	}

	// Parser
	// This is a crude implementation of HTML tokenization:
	// https://html.spec.whatwg.org/multipage/parsing.html

	_parse()
	{
	switch (this._mode) {
	case HTMLParser.Mode.Data:
	return this._parseData();
	case HTMLParser.Mode.ScriptData:
	return this._parseScriptData();
	case HTMLParser.Mode.TagOpen:
	return this._parseTagOpen();
	case HTMLParser.Mode.Attr:
	return this._parseAttr();
	case HTMLParser.Mode.CData:
	return this._parseCData();
	case HTMLParser.Mode.Doctype:
	return this._parseDoctype();
	case HTMLParser.Mode.Comment:
	return this._parseComment();
	case HTMLParser.Mode.BogusComment:
	return this._parseBogusComment();
	}

	console.assert();
	throw "Missing parser mode";
	}

	_parseData()
	{
	let startPos = this._pos;
	let text = this._consumeUntilString("<", HTMLParser.Mode.TagOpen);
	if (text)
	this._push({type: HTMLParser.NodeType.Text, data: text, pos: startPos});

	if (this._isEOF() && this._data.endsWith("<"))
	this._handleEOF(this._pos - 1);
	}

	_parseScriptData()
	{
	let startPos = this._pos;
	let scriptText = "";

	// Parse as text until </script>.
	while (true) {
	scriptText += this._consumeUntilString("<");
	if (this._peekCaseInsensitiveString("/script>")) {
	this._pos += "/script>".length;
	this._mode = HTMLParser.Mode.Data;
	break;
	}
	if (this._handleEOF(startPos))
	return;
	scriptText += "<";
	}

	if (scriptText)
	this._push({type: HTMLParser.NodeType.Text, data: scriptText, pos: startPos});
	this._push({type: HTMLParser.NodeType.CloseTag, name: "script", pos: startPos + scriptText.length});
	}

	_parseTagOpen()
	{
	// \|<tag
	this._currentTagStartPos = this._pos - 1;

	if (this._peekString("!")) {
	// Comment.
	if (this._peekString("!--")) {
	this._pos += "!--".length;
	this._mode = HTMLParser.Mode.Comment;
	this._handleEOF(this._currentTagStartPos);
	return;
	}

	// DOCTYPE.
	if (this._peekCaseInsensitiveString("!doctype")) {
	let startPos = this._pos;
	this._pos += "!DOCTYPE".length;
	this._doctypeRaw = this._data.substring(startPos, this._pos);
	this._mode = HTMLParser.Mode.Doctype;
	this._handleEOF(this._currentTagStartPos);
	return;
	}

	// CDATA.
	if (this._peekString("![CDATA[")) {
	this._pos += "![CDATA[".length;
	this._mode = HTMLParser.Mode.CData;
	this._handleEOF(this._currentTagStartPos);
	return;
	}

	// Bogus Comment.
	this._pos++;
	this._mode = HTMLParser.Mode.BogusComment;
	this._handleEOF(this._currentTagStartPos);
	return;
	}

	if (this._peekString("?")) {
	// Bogus Comment.
	this._pos++;
	this._mode = HTMLParser.Mode.BogusComment;
	this._bogusCommentOpener = "<?";
	this._handleEOF(this._currentTagStartPos);
	return;
	}

	if (this._peekString("/")) {
	// End Tag.
	this._pos++;
	let text = this._consumeUntilString(">", HTMLParser.Mode.Data);
	this._push({type: HTMLParser.NodeType.CloseTag, name: text, pos: this._currentTagStartPos});
	return;
	}

	// ASCII - Open Tag
	if (this._peekCharacterRegex(/[a-z]/i)) {
	let text = this._consumeRegex(/[^\s/>]+/);
	if (text) {
	if (this._peekCharacterRegex(/\s/)) {
	this._currentTagName = text;
	this._currentTagAttributes = [];
	this._mode = HTMLParser.Mode.Attr;
	return;
	}

	if (this._peekString("/>")) {
	this._pos += "/>".length;
	this._mode = HTMLParser.Mode.Data;
	this._push({type: HTMLParser.NodeType.OpenTag, name: text, closed: true, pos: this._currentTagStartPos});
	return;
	}

	if (this._peekString(">")) {
	this._pos++;
	this._mode = HTMLParser.Mode.Data;
	this._push({type: HTMLParser.NodeType.OpenTag, name: text, closed: false, pos: this._currentTagStartPos});
	return;
	}

	// End of document. Output any remaining data as error text.
	console.assert(this._isEOF());
	this._push({type: HTMLParser.NodeType.ErrorText, data: "<" + text, pos: this._currentTagStartPos});
	return;
	}
	}

	// Anything else, treat as text.
	this._push({type: HTMLParser.NodeType.Text, data: "<", pos: this._currentTagStartPos});
	this._mode = HTMLParser.Mode.Data;
	}

	_parseAttr()
	{
	this._consumeWhitespace();

	if (this._peekString("/>")) {
	this._pos += "/>".length;
	this._mode = HTMLParser.Mode.Data;
	this._push({type: HTMLParser.NodeType.OpenTag, name: this._currentTagName, closed: true, attributes: this._currentTagAttributes, pos: this._currentTagStartPos});
	return;
	}

	if (this._peekString(">")) {
	this._pos++;
	this._mode = HTMLParser.Mode.Data;
	this._push({type: HTMLParser.NodeType.OpenTag, name: this._currentTagName, closed: false, attributes: this._currentTagAttributes, pos: this._currentTagStartPos});
	return;
	}

	// <tag \|attr
	let attributeNameStartPos = this._pos;

	let attributeName = this._consumeRegex(/[^\s=/>]+/);
	// console.assert(attributeName.length > 0, "Unexpected empty attribute name");
	if (this._peekString("/") \|\| this._peekString(">")) {
	if (attributeName)
	this._pushAttribute({name: attributeName, value: undefined, namePos: attributeNameStartPos});
	return;
	}

	this._consumeWhitespace();

	if (this._peekString("=")) {
	this._pos++;

	// <tag attr=\|value
	let attributeValueStartPos = this._pos;

	this._consumeWhitespace();

	if (this._peekString(`"`)) {
	let attributeValue = this._consumeDoubleQuotedString();
	this._pushAttribute({name: attributeName, value: attributeValue, quote: HTMLParser.AttrQuoteType.Double, namePos: attributeNameStartPos, valuePos: attributeValueStartPos});
	return;
	}

	if (this._peekString(`'`)) {
	let attributeValue = this._consumeSingleQuotedString();
	this._pushAttribute({name: attributeName, value: attributeValue, quote: HTMLParser.AttrQuoteType.Single, namePos: attributeNameStartPos, valuePos: attributeValueStartPos});
	return;
	}

	if (this._peekString(">")) {
	this._pos++;
	this._mode = HTMLParser.Mode.Data;
	this._push({type: HTMLParser.NodeType.OpenTag, name: this._currentTagName, closed: false, attributes: this._currentTagAttributes, pos: this._currentTagStartPos});
	return;
	}

	let whitespace = this._consumeWhitespace();
	if (whitespace) {
	this._pushAttribute({name: attributeName, value: undefined, quote: HTMLParser.AttrQuoteType.None, namePos: attributeNameStartPos});
	return;
	}

	let attributeValue = this._consumeRegex(/[^\s=/>]+/);
	this._pushAttribute({name: attributeName, value: attributeValue, quote: HTMLParser.AttrQuoteType.None, namePos: attributeNameStartPos, valuePos: attributeValueStartPos});
	return;
	}

	if (!this._isEOF()) {
	this._pushAttribute({name: attributeName, value: undefined, quote: HTMLParser.AttrQuoteType.None, namePos: attributeNameStartPos});
	return;
	}

	// End of document. Treat everything up to now as error text.
	console.assert(this._isEOF());
	this._push({type: HTMLParser.NodeType.ErrorText, data: this._data.substring(this._currentTagStartPos), pos: this._currentTagStartPos});
	return;
	}

	_parseComment()
	{
	let text = this._consumeUntilString("-->", HTMLParser.Mode.Data);
	if (this._isEOF() && !this._data.endsWith("-->")) {
	this._push({type: HTMLParser.NodeType.ErrorText, data: this._data.substring(this._currentTagStartPos), pos: this._currentTagStartPos});
	return;
	}

	let closePos = this._pos - "-->".length;
	this._push({type: HTMLParser.NodeType.Comment, data: text, pos: this._currentTagStartPos, closePos});
	}

	_parseBogusComment()
	{
	let text = this._consumeUntilString(">", HTMLParser.Mode.Data);
	if (this._isEOF() && !this._data.endsWith(">")) {
	this._push({type: HTMLParser.NodeType.ErrorText, data: this._data.substring(this._currentTagStartPos), pos: this._currentTagStartPos});
	return;
	}

	let closePos = this._pos - ">".length;
	this._push({type: HTMLParser.NodeType.Comment, data: text, opener: this._bogusCommentOpener \|\| "", pos: this._currentTagStartPos, closePos});
	this._bogusCommentOpener = null;
	}

	_parseDoctype()
	{
	let text = this._consumeUntilString(">", HTMLParser.Mode.Data);
	if (this._isEOF() && !this._data.endsWith(">")) {
	this._push({type: HTMLParser.NodeType.ErrorText, data: this._data.substring(this._currentTagStartPos), pos: this._currentTagStartPos});
	return;
	}

	let closePos = this._pos - ">".length;
	this._push({type: HTMLParser.NodeType.Doctype, data: text, raw: this._doctypeRaw, pos: this._currentTagStartPos, closePos});
	this._doctypeRaw = null;
	}

	_parseCData()
	{
	let text = this._consumeUntilString("]]>", HTMLParser.Mode.Data);
	if (this._isEOF() && !this._data.endsWith("]]>")) {
	this._push({type: HTMLParser.NodeType.ErrorText, data: this._data.substring(this._currentTagStartPos), pos: this._currentTagStartPos});
	return;
	}

	let closePos = this._pos - "]]>".length;
	this._push({type: HTMLParser.NodeType.CData, data: text, pos: this._currentTagStartPos, closePos});
	}

	_pushAttribute(attr)
	{
	this._currentTagAttributes.push(attr);
	this._handleEOF(this._currentTagStartPos);
	}

	_handleEOF(lastPosition)
	{
	if (!this._isEOF())
	return false;

	// End of document. Treat everything from the last position as error text.
	this._push({type: HTMLParser.NodeType.ErrorText, data: this._data.substring(lastPosition), pos: lastPosition});
	return true;
	}

	_push(node)
	{
	// Custom mode for some elements.
	if (node.type === HTMLParser.NodeType.OpenTag) {
	if (!this._isXML && node.name.toLowerCase() === "script")
	this._mode = HTMLParser.Mode.ScriptData;
	}

	this._treeBuilder.pushParserNode(node);
	}
	};

	HTMLParser.Mode = {
	Data: "data",
	TagOpen: "tag-open",
	ScriptData: "script-data",
	Attr: "attr",
	CData: "cdata",
	Doctype: "doctype",
	Comment: "comment",
	BogusComment: "bogus-comment",
	};

	HTMLParser.NodeType = {
	Text: "text",
	ErrorText: "error-text",
	OpenTag: "open-tag",
	CloseTag: "close-tag",
	Comment: "comment",
	Doctype: "doctype",
	CData: "cdata",
	};

	HTMLParser.AttrQuoteType = {
	None: "none",
	Double: "double",
	Single: "single",
	};