| /* |
| * Copyright (C) 2019 Apple Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
| * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS |
| * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
| * THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| // This tree builder attempts to match input text to output DOM node. |
| // This therefore doesn't do HTML5 tree construction like implicitly-closing |
| // specific HTML parent nodes depending on being in a particular node, |
| // it only does basic implicitly-closing. In general this tries to be a |
| // whitespace reformatter for input text and not generate the ultimate |
| // html tree that a browser would generate. |
| // |
| // When run with the XML option, all HTML specific cases are disabled. |
| |
| HTMLTreeBuilderFormatter = class HTMLTreeBuilderFormatter |
| { |
| constructor({isXML} = {}) |
| { |
| this._isXML = !!isXML; |
| } |
| |
| // Public |
| |
| get dom() { return this._dom; } |
| |
| begin() |
| { |
| this._dom = []; |
| this._stackOfOpenElements = []; |
| } |
| |
| pushParserNode(parserNode) |
| { |
| let containerNode = this._stackOfOpenElements.lastValue; |
| if (!containerNode) |
| this._pushParserNodeTopLevel(parserNode); |
| else |
| this._pushParserNodeStack(parserNode, containerNode); |
| } |
| |
| end() |
| { |
| for (let node of this._stackOfOpenElements) |
| node.implicitClose = true; |
| } |
| |
| // Private |
| |
| _pushParserNodeTopLevel(parserNode) |
| { |
| if (parserNode.type === HTMLParser.NodeType.OpenTag) { |
| let node = this._buildDOMNodeFromOpenTag(parserNode); |
| this._dom.push(node); |
| if (!this._isEmptyNode(parserNode, node)) |
| this._stackOfOpenElements.push(node); |
| return; |
| } |
| |
| if (parserNode.type === HTMLParser.NodeType.CloseTag) { |
| let errorNode = this._buildErrorNodeFromCloseTag(parserNode); |
| this._dom.push(errorNode); |
| return; |
| } |
| |
| let node = this._buildSimpleNodeFromParserNode(parserNode); |
| this._dom.push(node); |
| } |
| |
| _pushParserNodeStack(parserNode, containerNode) |
| { |
| if (parserNode.type === HTMLParser.NodeType.OpenTag) { |
| let node = this._buildDOMNodeFromOpenTag(parserNode); |
| let childrenArray = containerNode.children; |
| if (!this._isXML) { |
| this._implicitlyCloseHTMLNodesForOpenTag(parserNode, node); |
| containerNode = this._stackOfOpenElements.lastValue; |
| childrenArray = containerNode ? containerNode.children : this._dom; |
| } |
| childrenArray.push(node); |
| if (!this._isEmptyNode(parserNode, node)) |
| this._stackOfOpenElements.push(node); |
| return; |
| } |
| |
| if (parserNode.type === HTMLParser.NodeType.CloseTag) { |
| let tagName = this._isXML ? parserNode.name : parserNode.name.toLowerCase(); |
| let matchingOpenTagIndex = this._indexOfStackNodeMatchingTagNames([tagName]); |
| |
| // Found a matching tag, implicitly-close nodes. |
| if (matchingOpenTagIndex !== -1) { |
| let nodesToPop = this._stackOfOpenElements.length - matchingOpenTagIndex; |
| for (let i = 0; i < nodesToPop - 1; ++i) { |
| let implicitlyClosingNode = this._stackOfOpenElements.pop(); |
| implicitlyClosingNode.implicitClose = true; |
| } |
| let implicitlyClosingNode = this._stackOfOpenElements.pop(); |
| if (parserNode.pos) { |
| implicitlyClosingNode.closeTagPos = parserNode.pos; |
| implicitlyClosingNode.closeTagName = parserNode.name; |
| } |
| return; |
| } |
| |
| // Did not find a matching tag to close. |
| // Treat this as an error text node. |
| let errorNode = this._buildErrorNodeFromCloseTag(parserNode); |
| containerNode.children.push(errorNode); |
| return; |
| } |
| |
| let node = this._buildSimpleNodeFromParserNode(parserNode); |
| containerNode.children.push(node); |
| } |
| |
| _implicitlyCloseHTMLNodesForOpenTag(parserNode, node) |
| { |
| if (parserNode.closed) |
| return; |
| |
| switch (node.lowercaseName) { |
| // <body> closes <head>. |
| case "body": |
| this._implicitlyCloseTagNamesInsideParentTagNames(["head"]); |
| break; |
| |
| // Inside <select>. |
| case "option": |
| this._implicitlyCloseTagNamesInsideParentTagNames(["option"], ["select"]); |
| break; |
| case "optgroup": { |
| let didClose = this._implicitlyCloseTagNamesInsideParentTagNames(["optgroup"], ["select"]);; |
| if (!didClose) |
| this._implicitlyCloseTagNamesInsideParentTagNames(["option"], ["select"]); |
| break; |
| } |
| |
| // Inside <ol>/<ul>. |
| case "li": |
| this._implicitlyCloseTagNamesInsideParentTagNames(["li"], ["ol", "ul"]); |
| break; |
| |
| // Inside <dl>. |
| case "dd": |
| case "dt": |
| this._implicitlyCloseTagNamesInsideParentTagNames(["dd", "dt"], ["dl"]); |
| break; |
| |
| // Inside <table>. |
| case "tr": { |
| let didClose = this._implicitlyCloseTagNamesInsideParentTagNames(["tr"], ["table"]); |
| if (!didClose) |
| this._implicitlyCloseTagNamesInsideParentTagNames(["td", "th"], ["table"]); |
| break; |
| } |
| case "td": |
| case "th": |
| this._implicitlyCloseTagNamesInsideParentTagNames(["td", "th"], ["table"]); |
| break; |
| case "tbody": { |
| let didClose = this._implicitlyCloseTagNamesInsideParentTagNames(["thead"], ["table"]); |
| if (!didClose) |
| didClose = this._implicitlyCloseTagNamesInsideParentTagNames(["tr"], ["table"]); |
| break; |
| } |
| case "tfoot": { |
| let didClose = this._implicitlyCloseTagNamesInsideParentTagNames(["tbody"], ["table"]); |
| if (!didClose) |
| didClose = this._implicitlyCloseTagNamesInsideParentTagNames(["tr"], ["table"]); |
| break; |
| } |
| case "colgroup": |
| this._implicitlyCloseTagNamesInsideParentTagNames(["colgroup"], ["table"]); |
| break; |
| |
| // Nodes that implicitly close a <p>. Normally this is only in <body> but we simplify to always. |
| // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody |
| case "address": |
| case "article": |
| case "aside": |
| case "blockquote": |
| case "center": |
| case "details": |
| case "dialog": |
| case "dir": |
| case "div": |
| case "dl": |
| case "fieldset": |
| case "figcaption": |
| case "figure": |
| case "footer": |
| case "form": |
| case "h1": |
| case "h2": |
| case "h3": |
| case "h4": |
| case "h5": |
| case "h6": |
| case "header": |
| case "hgroup": |
| case "hr": |
| case "listing": |
| case "main": |
| case "menu": |
| case "nav": |
| case "ol": |
| case "p": |
| case "plaintext": |
| case "pre": |
| case "section": |
| case "summary": |
| case "table": |
| case "ul": |
| case "xmp": |
| this._implicitlyCloseTagNamesInsideParentTagNames(["p"]); |
| break; |
| } |
| } |
| |
| _implicitlyCloseTagNamesInsideParentTagNames(tagNames, containerScopeTagNames) |
| { |
| console.assert(!this._isXML, "Implicitly closing only happens in HTML. Also, names are compared case insensitively which would be invalid for XML."); |
| |
| let existingOpenTagIndex = this._indexOfStackNodeMatchingTagNames(tagNames); |
| if (existingOpenTagIndex === -1) |
| return false; |
| |
| // Disallow impliticly closing beyond the container tag boundary. |
| if (containerScopeTagNames) { |
| for (let i = existingOpenTagIndex + 1; i < this._stackOfOpenElements.length; ++i) { |
| let stackNode = this._stackOfOpenElements[i]; |
| let name = stackNode.lowercaseName; |
| if (containerScopeTagNames.includes(name)) |
| return false; |
| } |
| } |
| |
| // Implicitly close tags. |
| let nodesToPop = this._stackOfOpenElements.length - existingOpenTagIndex; |
| for (let i = 0; i < nodesToPop; ++i) { |
| let implicitlyClosingNode = this._stackOfOpenElements.pop(); |
| implicitlyClosingNode.implicitClose = true; |
| } |
| |
| return true; |
| } |
| |
| _indexOfStackNodeMatchingTagNames(tagNames) |
| { |
| for (let i = this._stackOfOpenElements.length - 1; i >= 0; --i) { |
| let stackNode = this._stackOfOpenElements[i]; |
| let name = this._isXML ? stackNode.name : stackNode.lowercaseName; |
| if (tagNames.includes(name)) |
| return i; |
| } |
| |
| return -1; |
| } |
| |
| _isEmptyNode(parserNode, node) |
| { |
| if (parserNode.closed) |
| return true; |
| |
| if (!this._isXML && HTMLTreeBuilderFormatter.TagNamesWithoutChildren.has(node.lowercaseName)) |
| return true; |
| |
| return false; |
| } |
| |
| _buildDOMNodeFromOpenTag(parserNode) |
| { |
| console.assert(parserNode.type === HTMLParser.NodeType.OpenTag); |
| |
| return { |
| type: HTMLTreeBuilderFormatter.NodeType.Node, |
| name: parserNode.name, |
| lowercaseName: parserNode.name.toLowerCase(), |
| children: [], |
| attributes: parserNode.attributes, |
| pos: parserNode.pos, |
| selfClose: parserNode.closed, |
| implicitClose: false, |
| }; |
| } |
| |
| _buildErrorNodeFromCloseTag(parserNode) |
| { |
| console.assert(parserNode.type === HTMLParser.NodeType.CloseTag); |
| |
| return { |
| type: HTMLTreeBuilderFormatter.NodeType.Error, |
| raw: "</" + parserNode.name + ">", |
| pos: parserNode.pos, |
| }; |
| } |
| |
| _buildSimpleNodeFromParserNode(parserNode) |
| { |
| // Pass ErrorText through as Text. |
| if (parserNode.type === HTMLParser.NodeType.ErrorText) |
| parserNode.type = HTMLParser.NodeType.Text; |
| |
| // Pass these nodes right through: Text, Comment, Doctype, CData |
| console.assert(parserNode.type === HTMLTreeBuilderFormatter.NodeType.Text || parserNode.type === HTMLTreeBuilderFormatter.NodeType.Comment || parserNode.type === HTMLTreeBuilderFormatter.NodeType.Doctype || parserNode.type === HTMLTreeBuilderFormatter.NodeType.CData); |
| console.assert("data" in parserNode); |
| |
| return parserNode; |
| } |
| }; |
| |
| HTMLTreeBuilderFormatter.TagNamesWithoutChildren = new Set([ |
| "area", |
| "base", |
| "basefont", |
| "br", |
| "canvas", |
| "col", |
| "command", |
| "embed", |
| "frame", |
| "hr", |
| "img", |
| "input", |
| "keygen", |
| "link", |
| "menuitem", |
| "meta", |
| "param", |
| "source", |
| "track", |
| "wbr", |
| ]); |
| |
| HTMLTreeBuilderFormatter.NodeType = { |
| Text: "text", |
| Node: "node", |
| Comment: "comment", |
| Doctype: "doctype", |
| CData: "cdata", |
| Error: "error", |
| }; |