| /* |
| Copyright (C) 2004-2016 Apple Inc. All rights reserved. |
| |
| This library is free software; you can redistribute it and/or |
| modify it under the terms of the GNU Library General Public |
| License as published by the Free Software Foundation; either |
| version 2 of the License, or (at your option) any later version. |
| |
| This library is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| Library General Public License for more details. |
| |
| You should have received a copy of the GNU Library General Public License |
| along with this library; see the file COPYING.LIB. If not, write to |
| the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| Boston, MA 02110-1301, USA. |
| */ |
| |
| #pragma once |
| |
| #include <wtf/Deque.h> |
| #include <wtf/text/WTFString.h> |
| |
| namespace WebCore { |
| |
| // FIXME: This should not start with "k". |
| // FIXME: This is a shared tokenizer concept, not a SegmentedString concept, but this is the only common header for now. |
| constexpr LChar kEndOfFileMarker = 0; |
| |
| class SegmentedString { |
| public: |
| SegmentedString() = default; |
| SegmentedString(String&&); |
| SegmentedString(const String&); |
| |
| SegmentedString(SegmentedString&&) = delete; |
| SegmentedString(const SegmentedString&) = delete; |
| |
| SegmentedString& operator=(SegmentedString&&); |
| SegmentedString& operator=(const SegmentedString&) = default; |
| |
| void clear(); |
| void close(); |
| |
| void append(SegmentedString&&); |
| void append(const SegmentedString&); |
| |
| void append(String&&); |
| void append(const String&); |
| |
| void pushBack(String&&); |
| |
| void setExcludeLineNumbers(); |
| |
| bool isEmpty() const { return !m_currentSubstring.length; } |
| unsigned length() const; |
| |
| bool isClosed() const { return m_isClosed; } |
| |
| void advance(); |
| void advancePastNonNewline(); // Faster than calling advance when we know the current character is not a newline. |
| void advancePastNewline(); // Faster than calling advance when we know the current character is a newline. |
| |
| enum AdvancePastResult { DidNotMatch, DidMatch, NotEnoughCharacters }; |
| template<unsigned length> AdvancePastResult advancePast(const char (&literal)[length]) { return advancePast<length, false>(literal); } |
| template<unsigned length> AdvancePastResult advancePastLettersIgnoringASCIICase(const char (&literal)[length]) { return advancePast<length, true>(literal); } |
| |
| unsigned numberOfCharactersConsumed() const; |
| |
| String toString() const; |
| |
| UChar currentCharacter() const { return m_currentCharacter; } |
| |
| OrdinalNumber currentColumn() const; |
| OrdinalNumber currentLine() const; |
| |
| // Sets value of line/column variables. Column is specified indirectly by a parameter columnAfterProlog |
| // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed. |
| void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAfterProlog, int prologLength); |
| |
| private: |
| struct Substring { |
| Substring() = default; |
| Substring(String&&); |
| |
| UChar currentCharacter() const; |
| UChar currentCharacterPreIncrement(); |
| |
| unsigned numberOfCharactersConsumed() const; |
| void appendTo(StringBuilder&) const; |
| |
| String string; |
| unsigned length { 0 }; |
| bool is8Bit; |
| union { |
| const LChar* currentCharacter8; |
| const UChar* currentCharacter16; |
| }; |
| bool doNotExcludeLineNumbers { true }; |
| }; |
| |
| enum FastPathFlags { |
| NoFastPath = 0, |
| Use8BitAdvanceAndUpdateLineNumbers = 1 << 0, |
| Use8BitAdvance = 1 << 1, |
| }; |
| |
| void appendSubstring(Substring&&); |
| |
| void processPossibleNewline(); |
| void startNewLine(); |
| |
| void advanceWithoutUpdatingLineNumber(); |
| void advanceWithoutUpdatingLineNumber16(); |
| void advanceAndUpdateLineNumber16(); |
| void advancePastSingleCharacterSubstringWithoutUpdatingLineNumber(); |
| void advancePastSingleCharacterSubstring(); |
| void advanceEmpty(); |
| |
| void updateAdvanceFunctionPointers(); |
| void updateAdvanceFunctionPointersForEmptyString(); |
| void updateAdvanceFunctionPointersForSingleCharacterSubstring(); |
| |
| void decrementAndCheckLength(); |
| |
| template<typename CharacterType> static bool characterMismatch(CharacterType, char, bool lettersIgnoringASCIICase); |
| template<unsigned length, bool lettersIgnoringASCIICase> AdvancePastResult advancePast(const char (&literal)[length]); |
| AdvancePastResult advancePastSlowCase(const char* literal, bool lettersIgnoringASCIICase); |
| |
| Substring m_currentSubstring; |
| Deque<Substring> m_otherSubstrings; |
| |
| bool m_isClosed { false }; |
| |
| UChar m_currentCharacter { 0 }; |
| |
| unsigned m_numberOfCharactersConsumedPriorToCurrentSubstring { 0 }; |
| unsigned m_numberOfCharactersConsumedPriorToCurrentLine { 0 }; |
| int m_currentLine { 0 }; |
| |
| unsigned char m_fastPathFlags { NoFastPath }; |
| void (SegmentedString::*m_advanceWithoutUpdatingLineNumberFunction)() { &SegmentedString::advanceEmpty }; |
| void (SegmentedString::*m_advanceAndUpdateLineNumberFunction)() { &SegmentedString::advanceEmpty }; |
| }; |
| |
| inline SegmentedString::Substring::Substring(String&& passedString) |
| : string(WTFMove(passedString)) |
| , length(string.length()) |
| { |
| if (length) { |
| is8Bit = string.impl()->is8Bit(); |
| if (is8Bit) |
| currentCharacter8 = string.impl()->characters8(); |
| else |
| currentCharacter16 = string.impl()->characters16(); |
| } |
| } |
| |
| inline unsigned SegmentedString::Substring::numberOfCharactersConsumed() const |
| { |
| return string.length() - length; |
| } |
| |
| ALWAYS_INLINE UChar SegmentedString::Substring::currentCharacter() const |
| { |
| ASSERT(length); |
| return is8Bit ? *currentCharacter8 : *currentCharacter16; |
| } |
| |
| ALWAYS_INLINE UChar SegmentedString::Substring::currentCharacterPreIncrement() |
| { |
| ASSERT(length); |
| return is8Bit ? *++currentCharacter8 : *++currentCharacter16; |
| } |
| |
| inline SegmentedString::SegmentedString(String&& string) |
| : m_currentSubstring(WTFMove(string)) |
| { |
| if (m_currentSubstring.length) { |
| m_currentCharacter = m_currentSubstring.currentCharacter(); |
| updateAdvanceFunctionPointers(); |
| } |
| } |
| |
| inline SegmentedString::SegmentedString(const String& string) |
| : SegmentedString(String { string }) |
| { |
| } |
| |
| ALWAYS_INLINE void SegmentedString::decrementAndCheckLength() |
| { |
| ASSERT(m_currentSubstring.length > 1); |
| if (UNLIKELY(--m_currentSubstring.length == 1)) |
| updateAdvanceFunctionPointersForSingleCharacterSubstring(); |
| } |
| |
| ALWAYS_INLINE void SegmentedString::advanceWithoutUpdatingLineNumber() |
| { |
| if (LIKELY(m_fastPathFlags & Use8BitAdvance)) { |
| m_currentCharacter = *++m_currentSubstring.currentCharacter8; |
| decrementAndCheckLength(); |
| return; |
| } |
| |
| (this->*m_advanceWithoutUpdatingLineNumberFunction)(); |
| } |
| |
| inline void SegmentedString::startNewLine() |
| { |
| ++m_currentLine; |
| m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed(); |
| } |
| |
| inline void SegmentedString::processPossibleNewline() |
| { |
| if (m_currentCharacter == '\n') |
| startNewLine(); |
| } |
| |
| inline void SegmentedString::advance() |
| { |
| if (LIKELY(m_fastPathFlags & Use8BitAdvance)) { |
| ASSERT(m_currentSubstring.length > 1); |
| bool lastCharacterWasNewline = m_currentCharacter == '\n'; |
| m_currentCharacter = *++m_currentSubstring.currentCharacter8; |
| bool haveOneCharacterLeft = --m_currentSubstring.length == 1; |
| if (LIKELY(!(lastCharacterWasNewline | haveOneCharacterLeft))) |
| return; |
| if (lastCharacterWasNewline & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers)) |
| startNewLine(); |
| if (haveOneCharacterLeft) |
| updateAdvanceFunctionPointersForSingleCharacterSubstring(); |
| return; |
| } |
| |
| (this->*m_advanceAndUpdateLineNumberFunction)(); |
| } |
| |
| ALWAYS_INLINE void SegmentedString::advancePastNonNewline() |
| { |
| ASSERT(m_currentCharacter != '\n'); |
| advanceWithoutUpdatingLineNumber(); |
| } |
| |
| inline void SegmentedString::advancePastNewline() |
| { |
| ASSERT(m_currentCharacter == '\n'); |
| if (m_currentSubstring.length > 1) { |
| if (m_currentSubstring.doNotExcludeLineNumbers) |
| startNewLine(); |
| m_currentCharacter = m_currentSubstring.currentCharacterPreIncrement(); |
| decrementAndCheckLength(); |
| return; |
| } |
| |
| (this->*m_advanceAndUpdateLineNumberFunction)(); |
| } |
| |
| inline unsigned SegmentedString::numberOfCharactersConsumed() const |
| { |
| return m_numberOfCharactersConsumedPriorToCurrentSubstring + m_currentSubstring.numberOfCharactersConsumed(); |
| } |
| |
| template<typename CharacterType> ALWAYS_INLINE bool SegmentedString::characterMismatch(CharacterType a, char b, bool lettersIgnoringASCIICase) |
| { |
| return lettersIgnoringASCIICase ? !isASCIIAlphaCaselessEqual(a, b) : a != b; |
| } |
| |
| template<unsigned lengthIncludingTerminator, bool lettersIgnoringASCIICase> SegmentedString::AdvancePastResult SegmentedString::advancePast(const char (&literal)[lengthIncludingTerminator]) |
| { |
| constexpr unsigned length = lengthIncludingTerminator - 1; |
| ASSERT(!literal[length]); |
| ASSERT(!strchr(literal, '\n')); |
| if (length + 1 < m_currentSubstring.length) { |
| if (m_currentSubstring.is8Bit) { |
| for (unsigned i = 0; i < length; ++i) { |
| if (characterMismatch(m_currentSubstring.currentCharacter8[i], literal[i], lettersIgnoringASCIICase)) |
| return DidNotMatch; |
| } |
| m_currentSubstring.currentCharacter8 += length; |
| m_currentCharacter = *m_currentSubstring.currentCharacter8; |
| } else { |
| for (unsigned i = 0; i < length; ++i) { |
| if (characterMismatch(m_currentSubstring.currentCharacter16[i], literal[i], lettersIgnoringASCIICase)) |
| return DidNotMatch; |
| } |
| m_currentSubstring.currentCharacter16 += length; |
| m_currentCharacter = *m_currentSubstring.currentCharacter16; |
| } |
| m_currentSubstring.length -= length; |
| return DidMatch; |
| } |
| return advancePastSlowCase(literal, lettersIgnoringASCIICase); |
| } |
| |
| inline void SegmentedString::updateAdvanceFunctionPointers() |
| { |
| if (m_currentSubstring.length > 1) { |
| if (m_currentSubstring.is8Bit) { |
| m_fastPathFlags = Use8BitAdvance; |
| if (m_currentSubstring.doNotExcludeLineNumbers) |
| m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers; |
| return; |
| } |
| m_fastPathFlags = NoFastPath; |
| m_advanceWithoutUpdatingLineNumberFunction = &SegmentedString::advanceWithoutUpdatingLineNumber16; |
| if (m_currentSubstring.doNotExcludeLineNumbers) |
| m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceAndUpdateLineNumber16; |
| else |
| m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceWithoutUpdatingLineNumber16; |
| return; |
| } |
| |
| if (!m_currentSubstring.length) { |
| updateAdvanceFunctionPointersForEmptyString(); |
| return; |
| } |
| |
| updateAdvanceFunctionPointersForSingleCharacterSubstring(); |
| } |
| |
| } |