blob: fe7f1914037d335bf98e48e5d6f9319309c260c7 [file] [log] [blame]
/*
Copyright (C) 2004-2016 Apple Inc. All rights reserved.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public License
along with this library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
*/
#pragma once
#include <wtf/Deque.h>
#include <wtf/text/WTFString.h>
namespace WebCore {
// FIXME: This should not start with "k".
// FIXME: This is a shared tokenizer concept, not a SegmentedString concept, but this is the only common header for now.
constexpr LChar kEndOfFileMarker = 0;
class SegmentedString {
public:
SegmentedString() = default;
SegmentedString(String&&);
SegmentedString(const String&);
SegmentedString(SegmentedString&&) = delete;
SegmentedString(const SegmentedString&) = delete;
SegmentedString& operator=(SegmentedString&&);
SegmentedString& operator=(const SegmentedString&) = default;
void clear();
void close();
void append(SegmentedString&&);
void append(const SegmentedString&);
void append(String&&);
void append(const String&);
void pushBack(String&&);
void setExcludeLineNumbers();
bool isEmpty() const { return !m_currentSubstring.length; }
unsigned length() const;
bool isClosed() const { return m_isClosed; }
void advance();
void advancePastNonNewline(); // Faster than calling advance when we know the current character is not a newline.
void advancePastNewline(); // Faster than calling advance when we know the current character is a newline.
enum AdvancePastResult { DidNotMatch, DidMatch, NotEnoughCharacters };
template<unsigned length> AdvancePastResult advancePast(const char (&literal)[length]) { return advancePast<length, false>(literal); }
template<unsigned length> AdvancePastResult advancePastLettersIgnoringASCIICase(const char (&literal)[length]) { return advancePast<length, true>(literal); }
unsigned numberOfCharactersConsumed() const;
String toString() const;
UChar currentCharacter() const { return m_currentCharacter; }
OrdinalNumber currentColumn() const;
OrdinalNumber currentLine() const;
// Sets value of line/column variables. Column is specified indirectly by a parameter columnAfterProlog
// which is a value of column that we should get after a prolog (first prologLength characters) has been consumed.
void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAfterProlog, int prologLength);
private:
struct Substring {
Substring() = default;
Substring(String&&);
UChar currentCharacter() const;
UChar currentCharacterPreIncrement();
unsigned numberOfCharactersConsumed() const;
void appendTo(StringBuilder&) const;
String string;
unsigned length { 0 };
bool is8Bit;
union {
const LChar* currentCharacter8;
const UChar* currentCharacter16;
};
bool doNotExcludeLineNumbers { true };
};
enum FastPathFlags {
NoFastPath = 0,
Use8BitAdvanceAndUpdateLineNumbers = 1 << 0,
Use8BitAdvance = 1 << 1,
};
void appendSubstring(Substring&&);
void processPossibleNewline();
void startNewLine();
void advanceWithoutUpdatingLineNumber();
void advanceWithoutUpdatingLineNumber16();
void advanceAndUpdateLineNumber16();
void advancePastSingleCharacterSubstringWithoutUpdatingLineNumber();
void advancePastSingleCharacterSubstring();
void advanceEmpty();
void updateAdvanceFunctionPointers();
void updateAdvanceFunctionPointersForEmptyString();
void updateAdvanceFunctionPointersForSingleCharacterSubstring();
void decrementAndCheckLength();
template<typename CharacterType> static bool characterMismatch(CharacterType, char, bool lettersIgnoringASCIICase);
template<unsigned length, bool lettersIgnoringASCIICase> AdvancePastResult advancePast(const char (&literal)[length]);
AdvancePastResult advancePastSlowCase(const char* literal, bool lettersIgnoringASCIICase);
Substring m_currentSubstring;
Deque<Substring> m_otherSubstrings;
bool m_isClosed { false };
UChar m_currentCharacter { 0 };
unsigned m_numberOfCharactersConsumedPriorToCurrentSubstring { 0 };
unsigned m_numberOfCharactersConsumedPriorToCurrentLine { 0 };
int m_currentLine { 0 };
unsigned char m_fastPathFlags { NoFastPath };
void (SegmentedString::*m_advanceWithoutUpdatingLineNumberFunction)() { &SegmentedString::advanceEmpty };
void (SegmentedString::*m_advanceAndUpdateLineNumberFunction)() { &SegmentedString::advanceEmpty };
};
inline SegmentedString::Substring::Substring(String&& passedString)
: string(WTFMove(passedString))
, length(string.length())
{
if (length) {
is8Bit = string.impl()->is8Bit();
if (is8Bit)
currentCharacter8 = string.impl()->characters8();
else
currentCharacter16 = string.impl()->characters16();
}
}
inline unsigned SegmentedString::Substring::numberOfCharactersConsumed() const
{
return string.length() - length;
}
ALWAYS_INLINE UChar SegmentedString::Substring::currentCharacter() const
{
ASSERT(length);
return is8Bit ? *currentCharacter8 : *currentCharacter16;
}
ALWAYS_INLINE UChar SegmentedString::Substring::currentCharacterPreIncrement()
{
ASSERT(length);
return is8Bit ? *++currentCharacter8 : *++currentCharacter16;
}
inline SegmentedString::SegmentedString(String&& string)
: m_currentSubstring(WTFMove(string))
{
if (m_currentSubstring.length) {
m_currentCharacter = m_currentSubstring.currentCharacter();
updateAdvanceFunctionPointers();
}
}
inline SegmentedString::SegmentedString(const String& string)
: SegmentedString(String { string })
{
}
ALWAYS_INLINE void SegmentedString::decrementAndCheckLength()
{
ASSERT(m_currentSubstring.length > 1);
if (UNLIKELY(--m_currentSubstring.length == 1))
updateAdvanceFunctionPointersForSingleCharacterSubstring();
}
ALWAYS_INLINE void SegmentedString::advanceWithoutUpdatingLineNumber()
{
if (LIKELY(m_fastPathFlags & Use8BitAdvance)) {
m_currentCharacter = *++m_currentSubstring.currentCharacter8;
decrementAndCheckLength();
return;
}
(this->*m_advanceWithoutUpdatingLineNumberFunction)();
}
inline void SegmentedString::startNewLine()
{
++m_currentLine;
m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed();
}
inline void SegmentedString::processPossibleNewline()
{
if (m_currentCharacter == '\n')
startNewLine();
}
inline void SegmentedString::advance()
{
if (LIKELY(m_fastPathFlags & Use8BitAdvance)) {
ASSERT(m_currentSubstring.length > 1);
bool lastCharacterWasNewline = m_currentCharacter == '\n';
m_currentCharacter = *++m_currentSubstring.currentCharacter8;
bool haveOneCharacterLeft = --m_currentSubstring.length == 1;
if (LIKELY(!(lastCharacterWasNewline | haveOneCharacterLeft)))
return;
if (lastCharacterWasNewline & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers))
startNewLine();
if (haveOneCharacterLeft)
updateAdvanceFunctionPointersForSingleCharacterSubstring();
return;
}
(this->*m_advanceAndUpdateLineNumberFunction)();
}
ALWAYS_INLINE void SegmentedString::advancePastNonNewline()
{
ASSERT(m_currentCharacter != '\n');
advanceWithoutUpdatingLineNumber();
}
inline void SegmentedString::advancePastNewline()
{
ASSERT(m_currentCharacter == '\n');
if (m_currentSubstring.length > 1) {
if (m_currentSubstring.doNotExcludeLineNumbers)
startNewLine();
m_currentCharacter = m_currentSubstring.currentCharacterPreIncrement();
decrementAndCheckLength();
return;
}
(this->*m_advanceAndUpdateLineNumberFunction)();
}
inline unsigned SegmentedString::numberOfCharactersConsumed() const
{
return m_numberOfCharactersConsumedPriorToCurrentSubstring + m_currentSubstring.numberOfCharactersConsumed();
}
template<typename CharacterType> ALWAYS_INLINE bool SegmentedString::characterMismatch(CharacterType a, char b, bool lettersIgnoringASCIICase)
{
return lettersIgnoringASCIICase ? !isASCIIAlphaCaselessEqual(a, b) : a != b;
}
template<unsigned lengthIncludingTerminator, bool lettersIgnoringASCIICase> SegmentedString::AdvancePastResult SegmentedString::advancePast(const char (&literal)[lengthIncludingTerminator])
{
constexpr unsigned length = lengthIncludingTerminator - 1;
ASSERT(!literal[length]);
ASSERT(!strchr(literal, '\n'));
if (length + 1 < m_currentSubstring.length) {
if (m_currentSubstring.is8Bit) {
for (unsigned i = 0; i < length; ++i) {
if (characterMismatch(m_currentSubstring.currentCharacter8[i], literal[i], lettersIgnoringASCIICase))
return DidNotMatch;
}
m_currentSubstring.currentCharacter8 += length;
m_currentCharacter = *m_currentSubstring.currentCharacter8;
} else {
for (unsigned i = 0; i < length; ++i) {
if (characterMismatch(m_currentSubstring.currentCharacter16[i], literal[i], lettersIgnoringASCIICase))
return DidNotMatch;
}
m_currentSubstring.currentCharacter16 += length;
m_currentCharacter = *m_currentSubstring.currentCharacter16;
}
m_currentSubstring.length -= length;
return DidMatch;
}
return advancePastSlowCase(literal, lettersIgnoringASCIICase);
}
inline void SegmentedString::updateAdvanceFunctionPointers()
{
if (m_currentSubstring.length > 1) {
if (m_currentSubstring.is8Bit) {
m_fastPathFlags = Use8BitAdvance;
if (m_currentSubstring.doNotExcludeLineNumbers)
m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers;
return;
}
m_fastPathFlags = NoFastPath;
m_advanceWithoutUpdatingLineNumberFunction = &SegmentedString::advanceWithoutUpdatingLineNumber16;
if (m_currentSubstring.doNotExcludeLineNumbers)
m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceAndUpdateLineNumber16;
else
m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceWithoutUpdatingLineNumber16;
return;
}
if (!m_currentSubstring.length) {
updateAdvanceFunctionPointersForEmptyString();
return;
}
updateAdvanceFunctionPointersForSingleCharacterSubstring();
}
}