blob: f89a54ca77e0780d38479c9e7eadc5ee0e0aff38 [file] [log] [blame]
/*
* Copyright (C) 2013 Google, Inc. All Rights Reserved.
* Copyright (C) 2015-2021 Apple Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "Attribute.h"
namespace WebCore {
struct DoctypeData {
WTF_MAKE_FAST_ALLOCATED;
public:
Vector<UChar> publicIdentifier;
Vector<UChar> systemIdentifier;
bool hasPublicIdentifier { false };
bool hasSystemIdentifier { false };
bool forceQuirks { false };
};
class HTMLToken {
WTF_MAKE_FAST_ALLOCATED;
public:
enum class Type : uint8_t {
Uninitialized,
DOCTYPE,
StartTag,
EndTag,
Comment,
Character,
EndOfFile,
};
struct Attribute {
Vector<UChar, 32> name;
Vector<UChar, 64> value;
};
typedef Vector<Attribute, 10> AttributeList;
typedef Vector<UChar, 256> DataVector;
HTMLToken() = default;
void clear();
Type type() const;
// EndOfFile
void makeEndOfFile();
// StartTag, EndTag, DOCTYPE.
const DataVector& name() const;
void appendToName(UChar);
// DOCTYPE.
void beginDOCTYPE();
void beginDOCTYPE(UChar);
void setForceQuirks();
void setPublicIdentifierToEmptyString();
void setSystemIdentifierToEmptyString();
void appendToPublicIdentifier(UChar);
void appendToSystemIdentifier(UChar);
std::unique_ptr<DoctypeData> releaseDoctypeData();
// StartTag, EndTag.
bool selfClosing() const;
const AttributeList& attributes() const;
void beginStartTag(LChar);
void beginEndTag(LChar);
void beginEndTag(const Vector<LChar, 32>&);
void beginAttribute();
void appendToAttributeName(UChar);
void appendToAttributeValue(UChar);
void appendToAttributeValue(unsigned index, StringView value);
template<typename CharacterType> void appendToAttributeValue(Span<const CharacterType>);
void endAttribute();
void setSelfClosing();
// Character.
// Starting a character token works slightly differently than starting
// other types of tokens because we want to save a per-character branch.
// There is no beginCharacters, and appending a character sets the type.
const DataVector& characters() const;
bool charactersIsAll8BitData() const;
void appendToCharacter(LChar);
void appendToCharacter(UChar);
void appendToCharacter(const Vector<LChar, 32>&);
template<typename CharacterType> void appendToCharacter(Span<const CharacterType>);
// Comment.
const DataVector& comment() const;
bool commentIsAll8BitData() const;
void beginComment();
void appendToComment(char);
void appendToComment(ASCIILiteral);
void appendToComment(UChar);
private:
DataVector m_data;
UChar m_data8BitCheck { 0 };
Type m_type { Type::Uninitialized };
// For StartTag and EndTag
bool m_selfClosing;
AttributeList m_attributes;
Attribute* m_currentAttribute;
// For DOCTYPE
std::unique_ptr<DoctypeData> m_doctypeData;
};
const HTMLToken::Attribute* findAttribute(const Vector<HTMLToken::Attribute>&, StringView name);
inline void HTMLToken::clear()
{
m_type = Type::Uninitialized;
m_data.clear();
m_data8BitCheck = 0;
}
inline HTMLToken::Type HTMLToken::type() const
{
return m_type;
}
inline void HTMLToken::makeEndOfFile()
{
ASSERT(m_type == Type::Uninitialized);
m_type = Type::EndOfFile;
}
inline const HTMLToken::DataVector& HTMLToken::name() const
{
ASSERT(m_type == Type::StartTag || m_type == Type::EndTag || m_type == Type::DOCTYPE);
return m_data;
}
inline void HTMLToken::appendToName(UChar character)
{
ASSERT(m_type == Type::StartTag || m_type == Type::EndTag || m_type == Type::DOCTYPE);
ASSERT(character);
m_data.append(character);
m_data8BitCheck |= character;
}
inline void HTMLToken::setForceQuirks()
{
ASSERT(m_type == Type::DOCTYPE);
m_doctypeData->forceQuirks = true;
}
inline void HTMLToken::beginDOCTYPE()
{
ASSERT(m_type == Type::Uninitialized);
m_type = Type::DOCTYPE;
m_doctypeData = makeUnique<DoctypeData>();
}
inline void HTMLToken::beginDOCTYPE(UChar character)
{
ASSERT(character);
beginDOCTYPE();
m_data.append(character);
m_data8BitCheck |= character;
}
inline void HTMLToken::setPublicIdentifierToEmptyString()
{
ASSERT(m_type == Type::DOCTYPE);
m_doctypeData->hasPublicIdentifier = true;
m_doctypeData->publicIdentifier.clear();
}
inline void HTMLToken::setSystemIdentifierToEmptyString()
{
ASSERT(m_type == Type::DOCTYPE);
m_doctypeData->hasSystemIdentifier = true;
m_doctypeData->systemIdentifier.clear();
}
inline void HTMLToken::appendToPublicIdentifier(UChar character)
{
ASSERT(character);
ASSERT(m_type == Type::DOCTYPE);
ASSERT(m_doctypeData->hasPublicIdentifier);
m_doctypeData->publicIdentifier.append(character);
}
inline void HTMLToken::appendToSystemIdentifier(UChar character)
{
ASSERT(character);
ASSERT(m_type == Type::DOCTYPE);
ASSERT(m_doctypeData->hasSystemIdentifier);
m_doctypeData->systemIdentifier.append(character);
}
inline std::unique_ptr<DoctypeData> HTMLToken::releaseDoctypeData()
{
return WTFMove(m_doctypeData);
}
inline bool HTMLToken::selfClosing() const
{
ASSERT(m_type == Type::StartTag || m_type == Type::EndTag);
return m_selfClosing;
}
inline void HTMLToken::setSelfClosing()
{
ASSERT(m_type == Type::StartTag || m_type == Type::EndTag);
m_selfClosing = true;
}
inline void HTMLToken::beginStartTag(LChar character)
{
ASSERT(character);
ASSERT(m_type == Type::Uninitialized);
m_type = Type::StartTag;
m_selfClosing = false;
m_attributes.clear();
#if ASSERT_ENABLED
m_currentAttribute = nullptr;
#endif
m_data.append(character);
}
inline void HTMLToken::beginEndTag(LChar character)
{
ASSERT(m_type == Type::Uninitialized);
m_type = Type::EndTag;
m_selfClosing = false;
m_attributes.clear();
#if ASSERT_ENABLED
m_currentAttribute = nullptr;
#endif
m_data.append(character);
}
inline void HTMLToken::beginEndTag(const Vector<LChar, 32>& characters)
{
ASSERT(m_type == Type::Uninitialized);
m_type = Type::EndTag;
m_selfClosing = false;
m_attributes.clear();
#if ASSERT_ENABLED
m_currentAttribute = nullptr;
#endif
m_data.appendVector(characters);
}
inline void HTMLToken::beginAttribute()
{
ASSERT(m_type == Type::StartTag || m_type == Type::EndTag);
m_attributes.grow(m_attributes.size() + 1);
m_currentAttribute = &m_attributes.last();
}
inline void HTMLToken::endAttribute()
{
ASSERT(m_currentAttribute);
#if ASSERT_ENABLED
m_currentAttribute = nullptr;
#endif
}
inline void HTMLToken::appendToAttributeName(UChar character)
{
ASSERT(character);
ASSERT(m_type == Type::StartTag || m_type == Type::EndTag);
ASSERT(m_currentAttribute);
m_currentAttribute->name.append(character);
}
inline void HTMLToken::appendToAttributeValue(UChar character)
{
ASSERT(character);
ASSERT(m_type == Type::StartTag || m_type == Type::EndTag);
ASSERT(m_currentAttribute);
m_currentAttribute->value.append(character);
}
template<typename CharacterType>
inline void HTMLToken::appendToAttributeValue(Span<const CharacterType> characters)
{
ASSERT(m_type == Type::StartTag || m_type == Type::EndTag);
ASSERT(m_currentAttribute);
m_currentAttribute->value.append(characters);
}
inline void HTMLToken::appendToAttributeValue(unsigned i, StringView value)
{
ASSERT(!value.isEmpty());
ASSERT(m_type == Type::StartTag || m_type == Type::EndTag);
append(m_attributes[i].value, value);
}
inline const HTMLToken::AttributeList& HTMLToken::attributes() const
{
ASSERT(m_type == Type::StartTag || m_type == Type::EndTag);
return m_attributes;
}
inline const HTMLToken::DataVector& HTMLToken::characters() const
{
ASSERT(m_type == Type::Character);
return m_data;
}
inline bool HTMLToken::charactersIsAll8BitData() const
{
ASSERT(m_type == Type::Character);
return m_data8BitCheck <= 0xFF;
}
inline void HTMLToken::appendToCharacter(LChar character)
{
ASSERT(m_type == Type::Uninitialized || m_type == Type::Character);
m_type = Type::Character;
m_data.append(character);
}
inline void HTMLToken::appendToCharacter(UChar character)
{
ASSERT(m_type == Type::Uninitialized || m_type == Type::Character);
m_type = Type::Character;
m_data.append(character);
m_data8BitCheck |= character;
}
inline void HTMLToken::appendToCharacter(const Vector<LChar, 32>& characters)
{
ASSERT(m_type == Type::Uninitialized || m_type == Type::Character);
m_type = Type::Character;
m_data.appendVector(characters);
}
template<typename CharacterType>
inline void HTMLToken::appendToCharacter(Span<const CharacterType> characters)
{
m_type = Type::Character;
m_data.append(characters);
if constexpr (std::is_same_v<CharacterType, UChar>) {
if (!charactersIsAll8BitData())
return;
for (auto character : characters)
m_data8BitCheck |= character;
}
}
inline const HTMLToken::DataVector& HTMLToken::comment() const
{
ASSERT(m_type == Type::Comment);
return m_data;
}
inline bool HTMLToken::commentIsAll8BitData() const
{
ASSERT(m_type == Type::Comment);
return m_data8BitCheck <= 0xFF;
}
inline void HTMLToken::beginComment()
{
ASSERT(m_type == Type::Uninitialized);
m_type = Type::Comment;
}
inline void HTMLToken::appendToComment(char character)
{
ASSERT(character);
ASSERT(m_type == Type::Comment);
m_data.append(character);
}
inline void HTMLToken::appendToComment(ASCIILiteral literal)
{
ASSERT(m_type == Type::Comment);
m_data.append(literal.characters8(), literal.length());
}
inline void HTMLToken::appendToComment(UChar character)
{
ASSERT(character);
ASSERT(m_type == Type::Comment);
m_data.append(character);
m_data8BitCheck |= character;
}
inline const HTMLToken::Attribute* findAttribute(const HTMLToken::AttributeList& attributes, Span<const UChar> name)
{
for (auto& attribute : attributes) {
// FIXME: The one caller that uses this probably wants to ignore letter case.
if (attribute.name.size() == name.size() && equal(attribute.name.data(), name.data(), name.size()))
return &attribute;
}
return nullptr;
}
} // namespace WebCore