blob: 9e62e743ce77c190a37b0c3dd854a713fa25948a [file] [log] [blame]
/*
* Copyright (C) 2008 Apple Inc. All Rights Reserved.
* Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
* Copyright (C) 2010 Google, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "XMLTokenizer.h"
#include "MarkupTokenizerInlineMethods.h"
#include "NotImplemented.h"
#include "XMLCharacterReferenceParser.h"
#include "XMLToken.h"
#include <wtf/ASCIICType.h>
#include <wtf/CurrentTime.h>
#include <wtf/UnusedParam.h>
#include <wtf/text/AtomicString.h>
#include <wtf/text/CString.h>
#include <wtf/text/StringBuilder.h>
using namespace WTF;
namespace WebCore {
// This has to go in a .cpp file, as the linker doesn't like it being included more than once.
// We don't have an XMLToken.cpp though, so this is the next best place.
template<>
QualifiedName AtomicMarkupTokenBase<XMLToken>::nameForAttribute(const XMLToken::Attribute& attribute) const
{
return QualifiedName(attribute.m_prefix.isEmpty() ? nullAtom : AtomicString(attribute.m_prefix.data(), attribute.m_prefix.size()), AtomicString(attribute.m_name.data(), attribute.m_name.size()), nullAtom);
}
template<>
bool AtomicMarkupTokenBase<XMLToken>::usesName() const
{
return m_type == XMLTokenTypes::StartTag || m_type == XMLTokenTypes::EndTag || m_type == XMLTokenTypes::DOCTYPE || m_type == XMLTokenTypes::Entity;
}
template<>
bool AtomicMarkupTokenBase<XMLToken>::usesAttributes() const
{
return m_type == XMLTokenTypes::StartTag || m_type == XMLTokenTypes::EndTag;
}
namespace {
inline bool isValidNameStart(UChar cc)
{
if (cc <= 0x40)
return false;
if (cc <= 0x5A)
return true;
if (cc == 0x5F)
return true;
if (cc <= 0x60)
return false;
if (cc <= 0x7A)
return true;
if (cc < 0xC0)
return false;
if (cc <= 0xD6)
return true;
if (cc == 0xD8)
return false;
if (cc <= 0xF6)
return true;
if (cc == 0xF7)
return false;
if (cc <= 0x2FF)
return true;
if (cc < 0x370)
return false;
if (cc <= 0x37D)
return true;
if (cc == 0x37E)
return false;
if (cc <= 0x1FFF)
return true;
if (cc < 0x200C)
return false;
if (cc <= 0x200D)
return true;
if (cc < 0x2070)
return false;
if (cc <= 0x218F)
return true;
if (cc < 0x2C00)
return false;
if (cc <= 0x2FEF)
return true;
if (cc < 0x3001)
return false;
if (cc <= 0xD7FF)
return true;
if (cc < 0xF900)
return false;
if (cc <= 0xFDCF)
return true;
if (cc < 0xFDF0)
return false;
if (cc <= 0xFFFD)
return true;
// FIXME: support non-BMP planes
return false;
}
inline bool isValidNameChar(UChar cc)
{
if (isValidNameStart(cc))
return true;
if (cc == '-' || cc == '.')
return true;
if (cc < 0x30)
return false;
if (cc < 0x3A)
return true;
if (cc < 0x0300)
return false;
if (cc <= 0x036F)
return true;
if (cc < 0x203F)
return false;
if (cc <= 0x2040)
return true;
return false;
}
inline bool isValidLiteralChar(UChar cc)
{
if (cc == 0xD || cc == 0xA)
return true;
if (cc < 0x20)
return false;
if (cc == '"' || cc == '&')
return false;
if (cc < 0x3C)
return true;
if (cc == '<' || cc == '>')
return false;
if (cc < 0x5B)
return true;
if (cc == '_')
return true;
if (cc <= 0x60)
return false;
if (cc < 0x7B)
return true;
return false;
}
}
#define XML_BEGIN_STATE(stateName) BEGIN_STATE(XMLTokenizerState, stateName)
#define XML_ADVANCE_TO(stateName) ADVANCE_TO(XMLTokenizerState, stateName)
#define XML_SWITCH_TO(stateName) SWITCH_TO(XMLTokenizerState, stateName)
#define EQ_STATE(CurrentState, NextState) \
XML_BEGIN_STATE(CurrentState) { \
if (isTokenizerWhitespace(cc)) \
XML_ADVANCE_TO(CurrentState); \
else if (cc == '=') \
XML_ADVANCE_TO(NextState); \
else { \
parseError(); \
return emitEndOfFile(source); \
} \
} \
END_STATE()
#define EQ_BEFORE_VALUE_STATES(EqualsState, BeforeValueState, ValueState) \
EQ_STATE(EqualsState, BeforeValueState) \
XML_BEGIN_STATE(BeforeValueState) { \
if (isTokenizerWhitespace(cc)) \
XML_ADVANCE_TO(BeforeValueState); \
else if (cc == '"' || cc == '\'') { \
m_additionalAllowedCharacter = cc; \
XML_ADVANCE_TO(ValueState); \
} else { \
parseError(); \
return emitEndOfFile(source); \
} \
} \
END_STATE()
XMLTokenizer::XMLTokenizer()
{
reset();
}
template<>
inline bool MarkupTokenizerBase<XMLToken, XMLTokenizerState>::shouldSkipNullCharacters() const
{
return false;
}
bool XMLTokenizer::nextToken(SegmentedString& source, XMLToken& token)
{
// If we have a token in progress, then we're supposed to be called back
// with the same token so we can finish it.
ASSERT(!m_token || m_token == &token || token.type() == XMLTokenTypes::Uninitialized);
m_token = &token;
if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))
return haveBufferedCharacterToken();
UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
switch (m_state) {
XML_BEGIN_STATE(DataState) {
if (cc == '&')
XML_ADVANCE_TO(CharacterReferenceStartState);
else if (cc == '<') {
if (m_token->type() == XMLTokenTypes::Character) {
// We have a bunch of character tokens queued up that we
// are emitting lazily here.
return true;
}
XML_ADVANCE_TO(TagOpenState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker)
return emitEndOfFile(source);
else {
bufferCharacter(cc);
XML_ADVANCE_TO(DataState);
}
}
END_STATE()
XML_BEGIN_STATE(CharacterReferenceStartState) {
if (cc == '#') {
bool notEnoughCharacters = false;
StringBuilder decodedCharacter;
if (consumeXMLCharacterReference(source, decodedCharacter, notEnoughCharacters)) {
for (unsigned i = 0; i < decodedCharacter.length(); ++i)
bufferCharacter(decodedCharacter[i]);
XML_SWITCH_TO(DataState);
} else if (notEnoughCharacters)
return haveBufferedCharacterToken();
} else if (isValidNameStart(cc)) {
if (m_token->type() == XMLTokenTypes::Character)
return emitAndReconsumeIn(source, XMLTokenizerState::CharacterReferenceStartState);
m_token->beginEntity(cc);
XML_ADVANCE_TO(EntityReferenceState);
}
parseError();
return emitEndOfFile(source);
}
END_STATE()
XML_BEGIN_STATE(EntityReferenceState) {
if (isValidNameChar(cc)) {
m_token->appendToName(cc);
XML_ADVANCE_TO(EntityReferenceState);
} else if (cc == ';')
return emitAndResumeIn(source, XMLTokenizerState::DataState);
else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(TagOpenState) {
if (cc == '!')
XML_ADVANCE_TO(MarkupDeclarationOpenState);
else if (cc == '/')
XML_ADVANCE_TO(EndTagOpenState);
else if (isValidNameStart(cc)) {
m_token->beginStartTag(cc);
XML_ADVANCE_TO(TagNameState);
} else if (cc == '?')
XML_ADVANCE_TO(ProcessingInstructionTargetStartState);
else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(EndTagOpenState) {
if (isValidNameStart(cc)) {
m_token->beginEndTag(cc);
XML_ADVANCE_TO(EndTagNameState);
} else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(TagNameState) {
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(BeforeAttributeNameState);
else if (cc == '/')
XML_ADVANCE_TO(SelfClosingStartTagState);
else if (cc == '>')
return emitAndResumeIn(source, XMLTokenizerState::DataState);
else if (isValidNameChar(cc)) {
m_token->appendToName(cc);
XML_ADVANCE_TO(TagNameState);
} else if (cc == ':' && !m_token->hasPrefix()) {
m_token->endPrefix();
XML_ADVANCE_TO(TagNameState);
} else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(EndTagNameState) {
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(EndTagSpaceState);
else if (cc == '>')
return emitAndResumeIn(source, XMLTokenizerState::DataState);
else if (isValidNameChar(cc)) {
m_token->appendToName(cc);
XML_ADVANCE_TO(EndTagNameState);
} else if (cc == ':' && !m_token->hasPrefix()) {
m_token->endPrefix();
XML_ADVANCE_TO(EndTagNameState);
} else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(EndTagSpaceState) {
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(EndTagSpaceState);
else if (cc == '>')
return emitAndResumeIn(source, XMLTokenizerState::DataState);
else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(BeforeAttributeNameState) {
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(BeforeAttributeNameState);
else if (cc == '/')
XML_ADVANCE_TO(SelfClosingStartTagState);
else if (cc == '>')
return emitAndResumeIn(source, XMLTokenizerState::DataState);
else if (isValidNameStart(cc)) {
m_token->addNewAttribute();
m_token->beginAttributeName(source.numberOfCharactersConsumed());
m_token->appendToAttributeName(cc);
XML_ADVANCE_TO(AttributeNameState);
} else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(AttributeNameState) {
if (isTokenizerWhitespace(cc)) {
m_token->endAttributeName(source.numberOfCharactersConsumed());
XML_ADVANCE_TO(AfterAttributeNameState);
} else if (cc == '=') {
m_token->endAttributeName(source.numberOfCharactersConsumed());
XML_ADVANCE_TO(BeforeAttributeValueState);
} else if (isValidNameChar(cc)) {
m_token->appendToAttributeName(cc);
XML_ADVANCE_TO(AttributeNameState);
} else if (cc == ':' && !m_token->attributeHasPrefix()) {
m_token->endAttributePrefix(source.numberOfCharactersConsumed());
XML_ADVANCE_TO(AttributeNameState);
} else {
parseError();
m_token->endAttributeName(source.numberOfCharactersConsumed());
return emitEndOfFile(source);
}
}
END_STATE()
EQ_STATE(AfterAttributeNameState, BeforeAttributeValueState)
XML_BEGIN_STATE(BeforeAttributeValueState) {
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(BeforeAttributeValueState);
else if (cc == '"' || cc == '\'') {
m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
m_additionalAllowedCharacter = cc;
XML_ADVANCE_TO(AttributeValueQuotedState);
} else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(AttributeValueQuotedState) {
if (cc == m_additionalAllowedCharacter) {
m_token->endAttributeValue(source.numberOfCharactersConsumed());
XML_ADVANCE_TO(AfterAttributeValueQuotedState);
} else if (cc == '&')
XML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
else if (cc == '<' || cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
m_token->endAttributeValue(source.numberOfCharactersConsumed());
return emitEndOfFile(source);
} else {
m_token->appendToAttributeValue(cc);
XML_ADVANCE_TO(AttributeValueQuotedState);
}
}
END_STATE()
XML_BEGIN_STATE(CharacterReferenceInAttributeValueState) {
if (cc == '#') {
bool notEnoughCharacters = false;
StringBuilder decodedCharacter;
source.push(cc);
if (consumeXMLCharacterReference(source, decodedCharacter, notEnoughCharacters)) {
for (unsigned i = 0; i < decodedCharacter.length(); ++i)
m_token->appendToAttributeValue(decodedCharacter[i]);
XML_ADVANCE_TO(AttributeValueQuotedState);
} else if (notEnoughCharacters)
return haveBufferedCharacterToken();
else {
parseError();
return emitEndOfFile(source);
}
} else {
m_token->appendToAttributeValue('&');
m_token->appendToAttributeValue(cc);
XML_ADVANCE_TO(AttributeValueQuotedState);
}
}
END_STATE()
XML_BEGIN_STATE(AfterAttributeValueQuotedState) {
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(BeforeAttributeNameState);
else if (cc == '/')
XML_ADVANCE_TO(SelfClosingStartTagState);
else if (cc == '>')
return emitAndResumeIn(source, XMLTokenizerState::DataState);
else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(SelfClosingStartTagState) {
if (cc == '>') {
m_token->setSelfClosing();
return emitAndResumeIn(source, XMLTokenizerState::DataState);
}
parseError();
return emitEndOfFile(source);
}
END_STATE()
XML_BEGIN_STATE(ProcessingInstructionTargetStartState) {
DEFINE_STATIC_LOCAL(String, xmlString, ("xml"));
// FIXME: this probably shouldn't be case-insensitive, but I don't know if people try capitalizing it ever.
if (cc == 'x' || cc == 'X') {
SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(xmlString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERTIgnoringCase(source, "xml");
XML_SWITCH_TO(XMLDeclAfterXMLState);
} else if (result == SegmentedString::NotEnoughCharacters)
return haveBufferedCharacterToken();
}
if (m_token->type() == XMLTokenTypes::ProcessingInstruction && isValidNameChar(cc))
m_token->appendToProcessingInstructionTarget(cc);
else if (isValidNameStart(cc))
m_token->beginProcessingInstruction(cc);
else {
parseError();
return emitEndOfFile(source);
}
XML_ADVANCE_TO(ProcessingInstructionTargetState);
}
END_STATE()
XML_BEGIN_STATE(XMLDeclAfterXMLState) {
if (isTokenizerWhitespace(cc)) {
m_token->beginXMLDeclaration();
XML_ADVANCE_TO(XMLDeclBeforeVersionNameState);
} else if (isValidNameChar(cc)) {
m_token->beginProcessingInstruction('x');
m_token->appendToProcessingInstructionTarget('m');
m_token->appendToProcessingInstructionTarget('l');
m_token->appendToProcessingInstructionTarget(cc);
XML_ADVANCE_TO(ProcessingInstructionTargetState);
}
else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(XMLDeclBeforeVersionNameState) {
DEFINE_STATIC_LOCAL(String, versionString, ("version"));
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(XMLDeclBeforeVersionNameState);
else {
SegmentedString::LookAheadResult result = source.lookAhead(versionString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERT(source, "version");
XML_SWITCH_TO(XMLDeclAfterVersionNameState);
} else if (result == SegmentedString::NotEnoughCharacters)
return haveBufferedCharacterToken();
}
parseError();
return emitEndOfFile(source);
}
END_STATE()
EQ_BEFORE_VALUE_STATES(XMLDeclAfterVersionNameState, XMLDeclBeforeVersionValueState, XMLDeclBeforeVersionOnePointState)
XML_BEGIN_STATE(XMLDeclBeforeVersionOnePointState) {
DEFINE_STATIC_LOCAL(String, onePointString, ("1."));
SegmentedString::LookAheadResult result = source.lookAhead(onePointString);
if (result == SegmentedString::DidMatch) {
source.advanceAndASSERT('1');
source.advanceAndASSERT('.');
m_token->appendToXMLVersion('1');
m_token->appendToXMLVersion('.');
XML_SWITCH_TO(XMLDeclVersionValueQuotedState);
} else if (result == SegmentedString::NotEnoughCharacters)
return haveBufferedCharacterToken();
parseError();
return emitEndOfFile(source);
}
END_STATE()
XML_BEGIN_STATE(XMLDeclVersionValueQuotedState) {
if (cc == m_additionalAllowedCharacter) {
XML_ADVANCE_TO(XMLDeclAfterVersionState);
} else if (isASCIIDigit(cc)) {
m_token->appendToXMLVersion(cc);
XML_ADVANCE_TO(XMLDeclVersionValueQuotedState);
} else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(XMLDeclAfterVersionState) {
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(XMLDeclBeforeEncodingNameState);
else if (cc == '?')
XML_ADVANCE_TO(XMLDeclCloseState);
else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(XMLDeclBeforeEncodingNameState) {
DEFINE_STATIC_LOCAL(String, encodingString, ("encoding"));
DEFINE_STATIC_LOCAL(String, standaloneString, ("standalone"));
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(XMLDeclBeforeEncodingNameState);
else if (cc == 'e') {
SegmentedString::LookAheadResult result = source.lookAhead(encodingString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERT(source, "encoding");
XML_SWITCH_TO(XMLDeclAfterEncodingNameState);
} else if (result == SegmentedString::NotEnoughCharacters)
return haveBufferedCharacterToken();
} else if (cc == 's') {
SegmentedString::LookAheadResult result = source.lookAhead(standaloneString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERT(source, "standalone");
XML_SWITCH_TO(XMLDeclAfterStandaloneNameState);
} else if (result == SegmentedString::NotEnoughCharacters)
return haveBufferedCharacterToken();
} else if (cc == '?')
XML_ADVANCE_TO(XMLDeclCloseState);
parseError();
return emitEndOfFile(source);
}
END_STATE()
EQ_BEFORE_VALUE_STATES(XMLDeclAfterEncodingNameState, XMLDeclBeforeEncodingValueState, XMLDeclEncodingValueStartQuotedState)
XML_BEGIN_STATE(XMLDeclEncodingValueStartQuotedState) {
if (isASCIIAlpha(cc)) {
m_token->beginXMLEncoding(cc);
XML_ADVANCE_TO(XMLDeclEncodingValueQuotedState);
}
parseError();
return emitEndOfFile(source);
}
END_STATE()
XML_BEGIN_STATE(XMLDeclEncodingValueQuotedState) {
if (cc == m_additionalAllowedCharacter) {
XML_ADVANCE_TO(XMLDeclAfterEncodingState);
} else if (isASCIIAlphanumeric(cc) || cc == '-') {
m_token->appendToXMLEncoding(cc);
XML_ADVANCE_TO(XMLDeclEncodingValueQuotedState);
} else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(XMLDeclAfterEncodingState) {
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(XMLDeclBeforeStandaloneNameState);
else if (cc == '?')
XML_ADVANCE_TO(XMLDeclCloseState);
else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(XMLDeclBeforeStandaloneNameState) {
DEFINE_STATIC_LOCAL(String, standaloneString, ("standalone"));
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(XMLDeclBeforeStandaloneNameState);
else if (cc == 's') {
SegmentedString::LookAheadResult result = source.lookAhead(standaloneString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERT(source, "standalone");
XML_SWITCH_TO(XMLDeclAfterStandaloneNameState);
} else if (result == SegmentedString::NotEnoughCharacters)
return haveBufferedCharacterToken();
} else if (cc == '?')
XML_ADVANCE_TO(XMLDeclCloseState);
parseError();
return emitEndOfFile(source);
}
END_STATE()
EQ_BEFORE_VALUE_STATES(XMLDeclAfterStandaloneNameState, XMLDeclBeforeStandaloneValueState, XMLDeclStandaloneValueQuotedState)
XML_BEGIN_STATE(XMLDeclStandaloneValueQuotedState) {
DEFINE_STATIC_LOCAL(String, yesString, ("yes\""));
DEFINE_STATIC_LOCAL(String, noString, ("no\""));
if (cc == 'y') {
SegmentedString::LookAheadResult result = source.lookAhead(yesString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERT(source, "yes\"");
m_token->setXMLStandalone(true);
XML_SWITCH_TO(XMLDeclAfterStandaloneState);
} else if (result == SegmentedString::NotEnoughCharacters)
return haveBufferedCharacterToken();
} else if (cc == 'n') {
SegmentedString::LookAheadResult result = source.lookAhead(noString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERT(source, "no\"");
m_token->setXMLStandalone(false);
XML_SWITCH_TO(XMLDeclAfterStandaloneState);
} else if (result == SegmentedString::NotEnoughCharacters)
return haveBufferedCharacterToken();
}
parseError();
return emitEndOfFile(source);
}
END_STATE()
XML_BEGIN_STATE(XMLDeclAfterStandaloneState) {
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(XMLDeclAfterStandaloneState);
if (cc == '?')
XML_ADVANCE_TO(XMLDeclCloseState);
else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(XMLDeclCloseState) {
if (cc == '>')
return emitAndResumeIn(source, XMLTokenizerState::DataState);
parseError();
return emitEndOfFile(source);
}
END_STATE()
XML_BEGIN_STATE(ProcessingInstructionTargetState) {
if (isTokenizerWhitespace(cc)) {
XML_ADVANCE_TO(ProcessingInstructionAfterTargetState);
} else if (isValidNameChar(cc)) {
m_token->appendToProcessingInstructionTarget(cc);
XML_ADVANCE_TO(ProcessingInstructionTargetState);
} else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(ProcessingInstructionAfterTargetState) {
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(ProcessingInstructionAfterTargetState);
else if (cc == '?')
XML_ADVANCE_TO(ProcessingInstructionCloseState);
else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
return emitEndOfFile(source);
} else {
m_token->appendToProcessingInstructionData(cc);
XML_ADVANCE_TO(ProcessingInstructionDataState);
}
}
END_STATE()
XML_BEGIN_STATE(ProcessingInstructionDataState) {
if (cc == '?')
XML_ADVANCE_TO(ProcessingInstructionCloseState);
else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
return emitEndOfFile(source);
} else {
m_token->appendToProcessingInstructionData(cc);
XML_ADVANCE_TO(ProcessingInstructionDataState);
}
}
END_STATE()
XML_BEGIN_STATE(ProcessingInstructionCloseState) {
if (cc == '>')
return emitAndResumeIn(source, XMLTokenizerState::DataState);
if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
return emitEndOfFile(source);
}
m_token->appendToProcessingInstructionData('?');
m_token->appendToProcessingInstructionData(cc);
XML_ADVANCE_TO(ProcessingInstructionDataState);
}
END_STATE()
XML_BEGIN_STATE(MarkupDeclarationOpenState) {
DEFINE_STATIC_LOCAL(String, dashDashString, ("--"));
DEFINE_STATIC_LOCAL(String, doctypeString, ("doctype"));
DEFINE_STATIC_LOCAL(String, cdataString, ("[CDATA["));
if (cc == '-') {
SegmentedString::LookAheadResult result = source.lookAhead(dashDashString);
if (result == SegmentedString::DidMatch) {
source.advanceAndASSERT('-');
source.advanceAndASSERT('-');
m_token->beginComment();
XML_SWITCH_TO(CommentState);
} else if (result == SegmentedString::NotEnoughCharacters)
return haveBufferedCharacterToken();
} else if (cc == 'D' || cc == 'd') {
SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(doctypeString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERTIgnoringCase(source, "doctype");
XML_SWITCH_TO(BeforeDOCTYPENameState);
} else if (result == SegmentedString::NotEnoughCharacters)
return haveBufferedCharacterToken();
} else if (cc == '[') {
SegmentedString::LookAheadResult result = source.lookAhead(cdataString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERT(source, "[CDATA[");
m_token->beginCDATA();
XML_SWITCH_TO(CDATASectionState);
} else if (result == SegmentedString::NotEnoughCharacters)
return haveBufferedCharacterToken();
}
parseError();
return emitEndOfFile(source);
}
END_STATE()
XML_BEGIN_STATE(CommentState) {
if (cc == '-')
XML_ADVANCE_TO(CommentDashState);
else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
return emitEndOfFile(source);
} else {
m_token->appendToComment(cc);
XML_ADVANCE_TO(CommentState);
}
}
END_STATE()
XML_BEGIN_STATE(CommentDashState) {
if (cc == '-')
XML_ADVANCE_TO(CommentEndState);
else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
return emitEndOfFile(source);
} else {
m_token->appendToComment('-');
m_token->appendToComment(cc);
XML_ADVANCE_TO(CommentState);
}
}
END_STATE()
XML_BEGIN_STATE(CommentEndState) {
if (cc == '>')
return emitAndResumeIn(source, XMLTokenizerState::DataState);
if (cc == '-') {
parseError();
return emitEndOfFile(source);
}
parseError();
return emitAndReconsumeIn(source, XMLTokenizerState::DataState);
}
END_STATE()
XML_BEGIN_STATE(BeforeDOCTYPENameState) {
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(BeforeDOCTYPENameState);
else if (isValidNameStart(cc)) {
m_token->beginDOCTYPE(cc);
XML_ADVANCE_TO(DOCTYPENameState);
} else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(DOCTYPENameState) {
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(AfterDOCTYPENameState);
else if (cc == '>')
return emitAndResumeIn(source, XMLTokenizerState::DataState);
else if (isValidNameChar(cc)) {
m_token->appendToName(cc);
XML_ADVANCE_TO(DOCTYPENameState);
} else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(AfterDOCTYPENameState) {
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(AfterDOCTYPENameState);
if (cc == '>')
return emitAndResumeIn(source, XMLTokenizerState::DataState);
if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
return emitEndOfFile(source);
}
DEFINE_STATIC_LOCAL(String, publicString, ("public"));
DEFINE_STATIC_LOCAL(String, systemString, ("system"));
if (cc == 'P' || cc == 'p') {
SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(publicString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERTIgnoringCase(source, "public");
XML_SWITCH_TO(AfterDOCTYPEPublicKeywordState);
} else if (result == SegmentedString::NotEnoughCharacters)
return haveBufferedCharacterToken();
} else if (cc == 'S' || cc == 's') {
SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(systemString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERTIgnoringCase(source, "system");
XML_SWITCH_TO(AfterDOCTYPESystemKeywordState);
} else if (result == SegmentedString::NotEnoughCharacters)
return haveBufferedCharacterToken();
} else if (cc == '[')
XML_ADVANCE_TO(BeforeDOCTYPEInternalSubsetState);
else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(AfterDOCTYPEPublicKeywordState) {
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) {
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
else if (cc == '"' || cc == '\'') {
m_token->setPublicIdentifierToEmptyString();
m_additionalAllowedCharacter = cc;
XML_ADVANCE_TO(DOCTYPEPublicIdentifierQuotedState);
} else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(DOCTYPEPublicIdentifierQuotedState) {
if (cc == m_additionalAllowedCharacter)
XML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
return emitEndOfFile(source);
} else {
m_token->appendToPublicIdentifier(cc);
XML_ADVANCE_TO(DOCTYPEPublicIdentifierQuotedState);
}
}
END_STATE()
XML_BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) {
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(AfterDOCTYPESystemKeywordState) {
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) {
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
else if (cc == '"' || cc == '\'') {
m_token->setSystemIdentifierToEmptyString();
m_additionalAllowedCharacter = cc;
XML_ADVANCE_TO(DOCTYPESystemIdentifierQuotedState);
} else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(DOCTYPESystemIdentifierQuotedState) {
if (cc == m_additionalAllowedCharacter)
XML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
else if (isValidLiteralChar(cc)) {
m_token->appendToSystemIdentifier(cc);
XML_ADVANCE_TO(DOCTYPESystemIdentifierQuotedState);
} else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(AfterDOCTYPESystemIdentifierState) {
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
else if (cc == '>')
return emitAndResumeIn(source, XMLTokenizerState::DataState);
else if (cc == '[')
XML_ADVANCE_TO(BeforeDOCTYPEInternalSubsetState);
else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(BeforeDOCTYPEInternalSubsetState) {
if (cc == ']')
XML_ADVANCE_TO(AfterDOCTYPEInternalSubsetState);
else {
// FIXME implement internal subset
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(AfterDOCTYPEInternalSubsetState) {
if (isTokenizerWhitespace(cc))
XML_ADVANCE_TO(AfterDOCTYPEInternalSubsetState);
else if (cc == '>')
return emitAndResumeIn(source, XMLTokenizerState::DataState);
else {
parseError();
return emitEndOfFile(source);
}
}
END_STATE()
XML_BEGIN_STATE(CDATASectionState) {
DEFINE_STATIC_LOCAL(String, closeString, ("]]>"));
if (cc == ']') {
SegmentedString::LookAheadResult result = source.lookAhead(closeString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERT(source, "]]>");
return emitAndReconsumeIn(source, XMLTokenizerState::DataState);
}
if (result == SegmentedString::NotEnoughCharacters)
return haveBufferedCharacterToken();
}
if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
return emitEndOfFile(source);
}
m_token->appendToCDATA(cc);
XML_ADVANCE_TO(CDATASectionState);
}
END_STATE()
}
ASSERT_NOT_REACHED();
return false;
}
inline void XMLTokenizer::bufferCharacter(UChar character)
{
ASSERT(character != InputStreamPreprocessor::endOfFileMarker);
m_token->ensureIsCharacterToken();
m_token->appendToCharacter(character);
}
inline void XMLTokenizer::parseError()
{
m_errorDuringParsing = true;
}
}