blob: 4cd3ac970060a173b989835f1d026542b378277e [file] [log] [blame]
/*
* Copyright (C) 2011 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "XMLTreeBuilder.h"
#include "CachedScript.h"
#include "CDATASection.h"
#include "Comment.h"
#include "Document.h"
#include "DocumentFragment.h"
#include "DocumentType.h"
#include "Frame.h"
// FIXME: Why are we including HTML entity information in the XML parser?
#include "HTMLEntitySearch.h"
#include "HTMLEntityTable.h"
#include "NewXMLDocumentParser.h"
#include "ProcessingInstruction.h"
#include "ScriptElement.h"
#include "ScriptSourceCode.h"
#include "XMLNSNames.h"
#include "XMLNames.h"
namespace WebCore {
XMLTreeBuilder::XMLTreeBuilder(NewXMLDocumentParser* parser, Document* document)
: m_document(document)
, m_parser(parser)
, m_isXHTML(false)
, m_sawFirstElement(false)
{
m_currentNodeStack.append(NodeStackItem(document));
}
XMLTreeBuilder::XMLTreeBuilder(NewXMLDocumentParser* parser, DocumentFragment* fragment, Element* parent)
: m_document(fragment->document())
, m_parser(parser)
, m_isXHTML(false)
, m_sawFirstElement(true)
{
NodeStackItem stackItem(fragment);
// Figure out namespaces
Vector<Element*> nodeStack;
while (parent) {
nodeStack.append(parent);
ContainerNode* node = parent->parentNode();
if (!node || !node->isElementNode())
break;
parent = static_cast<Element*>(node);
}
if (nodeStack.isEmpty()) {
m_currentNodeStack.append(stackItem);
return;
}
for (Element* element; !nodeStack.isEmpty(); nodeStack.removeLast()) {
element = nodeStack.last();
if (element->hasAttributes()) {
for (size_t i = 0; i < element->attributeCount(); ++i) {
const Attribute* attribute = element->attributeItem(i);
if (attribute->localName() == xmlnsAtom)
stackItem.setNamespaceURI(attribute->value());
else if (attribute->prefix() == xmlnsAtom)
stackItem.setNamespaceURI(attribute->localName(), attribute->value());
}
}
}
// If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
if (stackItem.namespaceURI().isNull() && !parent->inDocument())
stackItem.setNamespaceURI(parent->namespaceURI());
m_currentNodeStack.append(stackItem);
}
void XMLTreeBuilder::processToken(const AtomicXMLToken& token)
{
switch (token.type()) {
case XMLTokenTypes::Uninitialized:
ASSERT_NOT_REACHED();
break;
case XMLTokenTypes::ProcessingInstruction:
processProcessingInstruction(token);
break;
case XMLTokenTypes::XMLDeclaration:
processXMLDeclaration(token);
break;
case XMLTokenTypes::DOCTYPE:
processDOCTYPE(token);
break;
case XMLTokenTypes::StartTag:
processStartTag(token);
break;
case XMLTokenTypes::EndTag:
processEndTag(token);
break;
case XMLTokenTypes::CDATA:
processCDATA(token);
break;
case XMLTokenTypes::Character:
processCharacter(token);
break;
case XMLTokenTypes::Comment:
processComment(token);
break;
case XMLTokenTypes::Entity:
processEntity(token);
break;
case XMLTokenTypes::EndOfFile:
exitText();
return;
}
}
void XMLTreeBuilder::finish()
{
exitText();
}
void XMLTreeBuilder::pushCurrentNode(const NodeStackItem& stackItem)
{
ASSERT(stackItem.node());
m_currentNodeStack.append(stackItem);
// FIXME: is there a maximum DOM depth?
}
void XMLTreeBuilder::popCurrentNode()
{
ASSERT(m_currentNodeStack.size());
m_currentNodeStack.removeLast();
}
void XMLTreeBuilder::closeElement(PassRefPtr<Element> element)
{
element->finishParsingChildren();
ScriptElement* scriptElement = toScriptElement(element.get());
if (scriptElement)
m_parser->processScript(scriptElement);
popCurrentNode();
}
void XMLTreeBuilder::processProcessingInstruction(const AtomicXMLToken& token)
{
if (!failOnText())
return;
// FIXME: fall back if we can't handle the PI ourself.
add(ProcessingInstruction::create(m_document, token.target(), token.data()));
}
void XMLTreeBuilder::processXMLDeclaration(const AtomicXMLToken& token)
{
if (!failOnText())
return;
ExceptionCode ec = 0;
m_document->setXMLVersion(String(token.xmlVersion()), ec);
if (ec)
m_parser->stopParsing();
m_document->setXMLStandalone(token.xmlStandalone(), ec);
if (ec)
m_parser->stopParsing();
// FIXME: how should this behave if standalone is not specified?
// FIXME: set encoding.
}
void XMLTreeBuilder::processDOCTYPE(const AtomicXMLToken& token)
{
DEFINE_STATIC_LOCAL(AtomicString, xhtmlTransitional, ("-//W3C//DTD XHTML 1.0 Transitional//EN"));
DEFINE_STATIC_LOCAL(AtomicString, xhtml11, ("-//W3C//DTD XHTML 1.1//EN"));
DEFINE_STATIC_LOCAL(AtomicString, xhtmlStrict, ("-//W3C//DTD XHTML 1.0 Strict//EN"));
DEFINE_STATIC_LOCAL(AtomicString, xhtmlFrameset, ("-//W3C//DTD XHTML 1.0 Frameset//EN"));
DEFINE_STATIC_LOCAL(AtomicString, xhtmlBasic, ("-//W3C//DTD XHTML Basic 1.0//EN"));
DEFINE_STATIC_LOCAL(AtomicString, xhtmlMathML, ("-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN"));
DEFINE_STATIC_LOCAL(AtomicString, xhtmlMathMLSVG, ("-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"));
DEFINE_STATIC_LOCAL(AtomicString, xhtmlMobile, ("-//WAPFORUM//DTD XHTML Mobile 1.0//EN"));
if (!failOnText())
return;
AtomicString publicIdentifier(token.publicIdentifier().data(), token.publicIdentifier().size());
AtomicString systemIdentifier(token.systemIdentifier().data(), token.systemIdentifier().size());
RefPtr<DocumentType> doctype = DocumentType::create(m_document, token.name(), publicIdentifier, systemIdentifier);
m_document->setDocType(doctype);
m_document->parserAddChild(doctype);
if ((publicIdentifier == xhtmlTransitional)
|| (publicIdentifier == xhtml11)
|| (publicIdentifier == xhtmlStrict)
|| (publicIdentifier == xhtmlFrameset)
|| (publicIdentifier == xhtmlBasic)
|| (publicIdentifier == xhtmlMathML)
|| (publicIdentifier == xhtmlMathMLSVG)
|| (publicIdentifier == xhtmlMobile))
m_isXHTML = true;
}
void XMLTreeBuilder::processStartTag(const AtomicXMLToken& token)
{
exitText();
bool isFirstElement = !m_sawFirstElement;
m_sawFirstElement = true;
NodeStackItem top = m_currentNodeStack.last();
processNamespaces(token, top);
QualifiedName qName(token.prefix(), token.name(), top.namespaceForPrefix(token.prefix(), top.namespaceURI()));
RefPtr<Element> newElement = m_document->createElement(qName, true);
processAttributes(token, top, newElement);
newElement->beginParsingChildren();
m_currentNodeStack.last().node()->parserAddChild(newElement.get());
top.setNode(newElement);
pushCurrentNode(top);
if (!newElement->attached())
newElement->attach();
if (isFirstElement && m_document->frame())
m_document->frame()->loader()->dispatchDocumentElementAvailable();
if (token.selfClosing())
closeElement(newElement);
}
void XMLTreeBuilder::processEndTag(const AtomicXMLToken& token)
{
exitText();
RefPtr<ContainerNode> node = m_currentNodeStack.last().node();
if (!node->hasTagName(QualifiedName(token.prefix(), token.name(), m_currentNodeStack.last().namespaceForPrefix(token.prefix(), m_currentNodeStack.last().namespaceURI()))))
m_parser->stopParsing();
closeElement(toElement(node.get()));
}
void XMLTreeBuilder::processCharacter(const AtomicXMLToken& token)
{
appendToText(token.characters().data(), token.characters().size());
}
void XMLTreeBuilder::processCDATA(const AtomicXMLToken& token)
{
exitText();
add(CDATASection::create(m_document, token.data()));
}
void XMLTreeBuilder::processComment(const AtomicXMLToken& token)
{
exitText();
add(Comment::create(m_document, token.comment()));
}
void XMLTreeBuilder::processEntity(const AtomicXMLToken& token)
{
// FIXME: we should support internal subset.
if (m_isXHTML)
processHTMLEntity(token);
else
processXMLEntity(token);
}
void XMLTreeBuilder::processNamespaces(const AtomicXMLToken& token, NodeStackItem& stackItem)
{
for (unsigned i = 0; i < token.attributes().size(); ++i) {
const Attribute& tokenAttribute = token.attributes().at(i);
if (tokenAttribute.name().prefix() == xmlnsAtom)
stackItem.setNamespaceURI(tokenAttribute.name().localName(), tokenAttribute.value());
else if (tokenAttribute.name() == xmlnsAtom)
stackItem.setNamespaceURI(tokenAttribute.value());
}
}
void XMLTreeBuilder::processAttributes(const AtomicXMLToken& token, NodeStackItem& stackItem, PassRefPtr<Element> newElement)
{
for (unsigned i = 0; i < token.attributes().size(); ++i) {
const Attribute& tokenAttribute = token.attributes().at(i);
ExceptionCode ec = 0;
if (tokenAttribute.name().prefix() == xmlnsAtom)
newElement->setAttributeNS(XMLNSNames::xmlnsNamespaceURI, "xmlns:" + tokenAttribute.name().localName(), tokenAttribute.value(), ec);
else if (tokenAttribute.name() == xmlnsAtom)
newElement->setAttributeNS(XMLNSNames::xmlnsNamespaceURI, xmlnsAtom, tokenAttribute.value(), ec);
else {
QualifiedName qName(tokenAttribute.prefix(), tokenAttribute.localName(), stackItem.namespaceForPrefix(tokenAttribute.prefix(), nullAtom));
newElement->setAttribute(qName, tokenAttribute.value());
}
if (ec) {
m_parser->stopParsing();
return;
}
}
}
void XMLTreeBuilder::processXMLEntity(const AtomicXMLToken& token)
{
DEFINE_STATIC_LOCAL(AtomicString, amp, ("amp"));
DEFINE_STATIC_LOCAL(AtomicString, apos, ("apos"));
DEFINE_STATIC_LOCAL(AtomicString, gt, ("gt"));
DEFINE_STATIC_LOCAL(AtomicString, lt, ("lt"));
DEFINE_STATIC_LOCAL(AtomicString, quot, ("quot"));
DEFINE_STATIC_LOCAL(String, ampS, ("&"));
DEFINE_STATIC_LOCAL(String, aposS, ("'"));
DEFINE_STATIC_LOCAL(String, gtS, (">"));
DEFINE_STATIC_LOCAL(String, ltS, ("<"));
DEFINE_STATIC_LOCAL(String, quotS, ("\""));
if (token.name() == amp)
appendToText(ampS.characters(), 1);
else if (token.name() == apos)
appendToText(aposS.characters(), 1);
else if (token.name() == gt)
appendToText(gtS.characters(), 1);
else if (token.name() == lt)
appendToText(ltS.characters(), 1);
else if (token.name() == quot)
appendToText(quotS.characters(), 1);
else
m_parser->stopParsing();
}
void XMLTreeBuilder::processHTMLEntity(const AtomicXMLToken& token)
{
HTMLEntitySearch search;
const AtomicString& name = token.name();
for (size_t i = 0; i < name.length(); ++i) {
search.advance(name[i]);
if (!search.isEntityPrefix()) {
m_parser->stopParsing();
return;
}
}
search.advance(';');
if (!search.isEntityPrefix()) {
m_parser->stopParsing();
return;
}
UChar32 entityValue = search.mostRecentMatch()->firstValue;
// FIXME: We need to account for secondValue if any XML entities are longer
// than one unicode character.
ASSERT_NOT_REACHED();
// Darin Adler writes:
// You can see given the code above that this else is dead code. This code is in a strange state.
// And the reinterpret_cast to UChar* makes the code little-endian-specific. That is not good!
if (entityValue <= 0xFFFF)
appendToText(reinterpret_cast<UChar*>(&entityValue), 1);
else {
UChar utf16Pair[2] = { U16_LEAD(entityValue), U16_TRAIL(entityValue) };
appendToText(utf16Pair, 2);
}
}
inline void XMLTreeBuilder::add(PassRefPtr<Node> node)
{
m_currentNodeStack.last().node()->parserAddChild(node.get());
if (!node->attached())
node->attach();
}
void XMLTreeBuilder::appendToText(const UChar* text, size_t length)
{
enterText();
if (!m_leafText)
return;
m_leafText->append(text, length);
}
void XMLTreeBuilder::enterText()
{
if (!m_sawFirstElement) {
// FIXME: Guarantee the text is only whitespace.
return;
}
if (!m_leafText)
m_leafText = adoptPtr(new StringBuilder());
}
void XMLTreeBuilder::exitText()
{
if (!m_leafText.get())
return;
add(Text::create(m_document, m_leafText->toString()));
m_leafText.clear();
}
bool XMLTreeBuilder::failOnText()
{
if (!m_leafText)
return true;
// FIXME: Guarantee the text is only whitespace.
m_leafText.clear();
return true;
}
XMLTreeBuilder::NodeStackItem::NodeStackItem(PassRefPtr<ContainerNode> n, NodeStackItem* parent)
: m_node(n)
{
if (!parent) {
m_scopedNamespaces.set(xmlAtom, XMLNames::xmlNamespaceURI);
return;
}
m_namespace = parent->m_namespace;
m_scopedNamespaces = parent->m_scopedNamespaces;
}
bool XMLTreeBuilder::NodeStackItem::hasNamespaceURI(AtomicString prefix)
{
ASSERT(!prefix.isNull());
return m_scopedNamespaces.contains(prefix);
}
AtomicString XMLTreeBuilder::NodeStackItem::namespaceURI(AtomicString prefix)
{
ASSERT(!prefix.isNull());
if (m_scopedNamespaces.contains(prefix))
return m_scopedNamespaces.get(prefix);
return nullAtom;
}
void XMLTreeBuilder::NodeStackItem::setNamespaceURI(AtomicString prefix, AtomicString uri)
{
m_scopedNamespaces.set(prefix, uri);
}
AtomicString XMLTreeBuilder::NodeStackItem::namespaceForPrefix(AtomicString prefix, AtomicString fallback)
{
AtomicString uri = fallback;
if (!prefix.isNull() && hasNamespaceURI(prefix))
uri = namespaceURI(prefix);
return uri;
}
}