| /* |
| |
| Copyright (C) 2021 Apple Inc. All rights reserved. |
| |
| Redistribution and use in source and binary forms, with or without |
| modification, are permitted provided that the following conditions |
| are met: |
| 1. Redistributions of source code must retain the above copyright |
| notice, this list of conditions and the following disclaimer. |
| 2. Redistributions in binary form must reproduce the above copyright |
| notice, this list of conditions and the following disclaimer in the |
| documentation and/or other materials provided with the distribution. |
| |
| THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY |
| EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY |
| DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON |
| ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| */ |
| |
| #pragma once |
| |
| #include <wtf/text/StringView.h> |
| |
| namespace WTF { |
| |
| // SortedArrayMap is a map like HashMap, but it's read-only. It uses much less memory than HashMap. |
| // It uses binary search instead of hashing, so can be outperformed by HashMap for large maps. |
| // The array passed to the constructor has std::pair elements: keys first and values second. |
| // The array and the SortedArrayMap should typically both be global constant expressions. |
| |
| class SortedArrayBase { |
| protected: |
| // Some informal empirical tests indicate that arrays shorter than this are faster to |
| // search with linear search than with binary search. Even if we don't get this threshold |
| // exactly right, it's helpful for both performance and code size to use linear search at |
| // least for very small arrays, and important for performance to make sure that we use |
| // binary search for much larger ones. |
| static constexpr size_t binarySearchThreshold = 20; |
| }; |
| |
| template<typename ArrayType> class SortedArrayMap : public SortedArrayBase { |
| public: |
| using ElementType = typename std::remove_extent_t<ArrayType>; |
| using ValueType = typename ElementType::second_type; |
| |
| constexpr SortedArrayMap(const ArrayType&); |
| |
| // FIXME: To match HashMap interface better, would be nice to get the default value from traits. |
| template<typename KeyArgument> ValueType get(const KeyArgument&, const ValueType& defaultValue = { }) const; |
| |
| // FIXME: Should add a function like this to HashMap so the two kinds of maps are more interchangable. |
| template<typename KeyArgument> const ValueType* tryGet(const KeyArgument&) const; |
| |
| private: |
| const ArrayType& m_array; |
| }; |
| |
| template<typename ArrayType> class SortedArraySet : public SortedArrayBase { |
| public: |
| constexpr SortedArraySet(const ArrayType&); |
| template<typename KeyArgument> bool contains(const KeyArgument&) const; |
| |
| private: |
| const ArrayType& m_array; |
| }; |
| |
| struct ComparableStringView { |
| StringView string; |
| }; |
| |
| // NoUppercaseLettersOptimized means no characters with the 0x20 bit set. |
| // That means the strings can't include control characters, uppercase letters, or any of @[\]_. |
| enum class ASCIISubset { All, NoUppercaseLetters, NoUppercaseLettersOptimized }; |
| |
| template<ASCIISubset> struct ComparableASCIISubsetLiteral { |
| ASCIILiteral literal; |
| template<unsigned size> constexpr ComparableASCIISubsetLiteral(const char (&characters)[size]); |
| static std::optional<ComparableStringView> parse(StringView string) { return { { string } }; } |
| }; |
| |
| using ComparableASCIILiteral = ComparableASCIISubsetLiteral<ASCIISubset::All>; |
| using ComparableCaseFoldingASCIILiteral = ComparableASCIISubsetLiteral<ASCIISubset::NoUppercaseLetters>; |
| using ComparableLettersLiteral = ComparableASCIISubsetLiteral<ASCIISubset::NoUppercaseLettersOptimized>; |
| |
| template<ASCIISubset subset> constexpr bool operator==(ComparableASCIISubsetLiteral<subset>, ComparableASCIISubsetLiteral<subset>); |
| template<ASCIISubset subset> constexpr bool operator<(ComparableASCIISubsetLiteral<subset>, ComparableASCIISubsetLiteral<subset>); |
| |
| bool operator==(ComparableStringView, ComparableASCIILiteral); |
| bool operator==(ComparableStringView, ComparableCaseFoldingASCIILiteral); |
| bool operator==(ComparableStringView, ComparableLettersLiteral); |
| bool operator<(ComparableStringView, ComparableASCIILiteral); |
| bool operator<(ComparableStringView, ComparableCaseFoldingASCIILiteral); |
| bool operator<(ComparableStringView, ComparableLettersLiteral); |
| bool operator<(ComparableASCIILiteral, ComparableStringView); |
| bool operator<(ComparableCaseFoldingASCIILiteral, ComparableStringView); |
| bool operator<(ComparableLettersLiteral, ComparableStringView); |
| |
| template<typename OtherType> bool operator==(OtherType, ComparableStringView); |
| |
| template<typename StorageInteger> class PackedASCIILowerCodes { |
| public: |
| static_assert(std::is_unsigned_v<StorageInteger>); |
| |
| template<unsigned size> constexpr PackedASCIILowerCodes(const char (&characters)[size]); |
| static std::optional<PackedASCIILowerCodes> parse(StringView); |
| constexpr StorageInteger value() const { return m_value; } |
| |
| private: |
| template<unsigned size> static constexpr StorageInteger pack(const char (&characters)[size]); |
| explicit constexpr PackedASCIILowerCodes(StorageInteger); |
| StorageInteger m_value { 0 }; |
| }; |
| |
| template<typename StorageInteger> constexpr bool operator==(PackedASCIILowerCodes<StorageInteger>, PackedASCIILowerCodes<StorageInteger>); |
| template<typename StorageInteger> constexpr bool operator<(PackedASCIILowerCodes<StorageInteger>, PackedASCIILowerCodes<StorageInteger>); |
| |
| template<ASCIISubset subset> constexpr bool isInSubset(char character) |
| { |
| if (!(character && isASCII(character))) |
| return false; |
| switch (subset) { |
| case ASCIISubset::All: |
| return true; |
| case ASCIISubset::NoUppercaseLetters: |
| return !isASCIIUpper(character); |
| case ASCIISubset::NoUppercaseLettersOptimized: |
| return character == toASCIILowerUnchecked(character); |
| } |
| } |
| |
| template<ASCIISubset subset> template<unsigned size> constexpr ComparableASCIISubsetLiteral<subset>::ComparableASCIISubsetLiteral(const char (&characters)[size]) |
| : literal { ASCIILiteral::fromLiteralUnsafe(characters) } |
| { |
| ASSERT_UNDER_CONSTEXPR_CONTEXT(allOfConstExpr(&characters[0], &characters[size - 1], [] (char character) { |
| return isInSubset<subset>(character); |
| })); |
| ASSERT_UNDER_CONSTEXPR_CONTEXT(!characters[size - 1]); |
| } |
| |
| template<typename ArrayType> constexpr SortedArrayMap<ArrayType>::SortedArrayMap(const ArrayType& array) |
| : m_array { array } |
| { |
| ASSERT_UNDER_CONSTEXPR_CONTEXT(isSortedConstExpr(std::begin(array), std::end(array), [] (auto& a, auto b) { |
| return a.first < b.first; |
| })); |
| } |
| |
| template<typename, typename = void> constexpr bool HasParseMember = false; |
| template<typename T> constexpr bool HasParseMember<T, std::void_t<decltype(std::declval<T>().parse)>> = true; |
| |
| template<typename ArrayType> template<typename KeyArgument> inline auto SortedArrayMap<ArrayType>::tryGet(const KeyArgument& key) const -> const ValueType* |
| { |
| using KeyType = typename ElementType::first_type; |
| auto parsedKey = [&key] { |
| if constexpr (HasParseMember<KeyType>) |
| return KeyType::parse(key); |
| else |
| return std::make_optional(key); |
| }(); |
| if (!parsedKey) |
| return nullptr; |
| decltype(std::begin(m_array)) iterator; |
| if (std::size(m_array) < binarySearchThreshold) { |
| iterator = std::find_if(std::begin(m_array), std::end(m_array), [&parsedKey] (auto& pair) { |
| return pair.first == *parsedKey; |
| }); |
| if (iterator == std::end(m_array)) |
| return nullptr; |
| } else { |
| iterator = std::lower_bound(std::begin(m_array), std::end(m_array), *parsedKey, [] (auto& pair, auto& value) { |
| return pair.first < value; |
| }); |
| if (iterator == std::end(m_array) || !(iterator->first == *parsedKey)) |
| return nullptr; |
| } |
| return &iterator->second; |
| } |
| |
| template<typename ArrayType> template<typename KeyArgument> inline auto SortedArrayMap<ArrayType>::get(const KeyArgument& key, const ValueType& defaultValue) const -> ValueType |
| { |
| auto result = tryGet(key); |
| return result ? *result : defaultValue; |
| } |
| |
| template<typename ArrayType> constexpr SortedArraySet<ArrayType>::SortedArraySet(const ArrayType& array) |
| : m_array { array } |
| { |
| ASSERT_UNDER_CONSTEXPR_CONTEXT(isSortedConstExpr(std::begin(array), std::end(array))); |
| } |
| |
| template<typename ArrayType> template<typename KeyArgument> inline bool SortedArraySet<ArrayType>::contains(const KeyArgument& key) const |
| { |
| using KeyType = typename std::remove_extent_t<ArrayType>; |
| auto parsedKey = [&key] { |
| if constexpr (HasParseMember<KeyType>) |
| return KeyType::parse(key); |
| else |
| return std::make_optional(key); |
| }(); |
| if (!parsedKey) |
| return false; |
| if (std::size(m_array) < binarySearchThreshold) |
| return std::find(std::begin(m_array), std::end(m_array), *parsedKey) != std::end(m_array); |
| auto iterator = std::lower_bound(std::begin(m_array), std::end(m_array), *parsedKey); |
| return iterator != std::end(m_array) && *iterator == *parsedKey; |
| } |
| |
| constexpr int strcmpConstExpr(const char* a, const char* b) |
| { |
| while (*a == *b && *a && *b) { |
| ++a; |
| ++b; |
| } |
| return *a == *b ? 0 : *a < *b ? -1 : 1; |
| } |
| |
| template<typename CharacterType> inline bool lessThanASCIICaseFolding(const CharacterType* characters, unsigned length, const char* literalWithNoUppercase) |
| { |
| for (unsigned i = 0; i < length; ++i) { |
| if (!literalWithNoUppercase[i]) |
| return false; |
| if (auto character = toASCIILower(characters[i]); character != literalWithNoUppercase[i]) |
| return character < literalWithNoUppercase[i]; |
| } |
| return true; |
| } |
| |
| inline bool lessThanASCIICaseFolding(StringView string, const char* literalWithNoUppercase) |
| { |
| if (string.is8Bit()) |
| return lessThanASCIICaseFolding(string.characters8(), string.length(), literalWithNoUppercase); |
| return lessThanASCIICaseFolding(string.characters16(), string.length(), literalWithNoUppercase); |
| } |
| |
| template<typename CharacterType> inline bool lessThanASCIICaseFolding(const char* literalWithNoUppercase, const CharacterType* characters, unsigned length) |
| { |
| for (unsigned i = 0; i < length; ++i) { |
| if (!literalWithNoUppercase[i]) |
| return true; |
| if (auto character = toASCIILower(characters[i]); character != literalWithNoUppercase[i]) |
| return literalWithNoUppercase[i] < character; |
| } |
| return false; |
| } |
| |
| inline bool lessThanASCIICaseFolding(const char* literalWithNoUppercase, StringView string) |
| { |
| if (string.is8Bit()) |
| return lessThanASCIICaseFolding(literalWithNoUppercase, string.characters8(), string.length()); |
| return lessThanASCIICaseFolding(literalWithNoUppercase, string.characters16(), string.length()); |
| } |
| |
| template<ASCIISubset subset> constexpr bool operator==(ComparableASCIISubsetLiteral<subset> a, ComparableASCIISubsetLiteral<subset> b) |
| { |
| return !strcmpConstExpr(a.literal.characters(), b.literal.characters()); |
| } |
| |
| template<ASCIISubset subset> constexpr bool operator<(ComparableASCIISubsetLiteral<subset> a, ComparableASCIISubsetLiteral<subset> b) |
| { |
| return strcmpConstExpr(a.literal.characters(), b.literal.characters()) < 0; |
| } |
| |
| inline bool operator==(ComparableStringView a, ComparableASCIILiteral b) |
| { |
| return a.string == b.literal; |
| } |
| |
| inline bool operator<(ComparableStringView a, ComparableASCIILiteral b) |
| { |
| return codePointCompare(a.string, b.literal.characters()) < 0; |
| } |
| |
| inline bool operator<(ComparableASCIILiteral a, ComparableStringView b) |
| { |
| return codePointCompare(a.literal.characters(), b.string) < 0; |
| } |
| |
| inline bool operator==(ComparableStringView a, ComparableLettersLiteral b) |
| { |
| return equalLettersIgnoringASCIICaseCommonWithoutLength(a.string, b.literal); |
| } |
| |
| inline bool operator<(ComparableStringView a, ComparableLettersLiteral b) |
| { |
| return lessThanASCIICaseFolding(a.string, b.literal); |
| } |
| |
| inline bool operator<(ComparableLettersLiteral a, ComparableStringView b) |
| { |
| return lessThanASCIICaseFolding(a.literal, b.string); |
| } |
| |
| inline bool operator==(ComparableStringView a, ComparableCaseFoldingASCIILiteral b) |
| { |
| return equalIgnoringASCIICase(a.string, b.literal); |
| } |
| |
| inline bool operator<(ComparableStringView a, ComparableCaseFoldingASCIILiteral b) |
| { |
| return lessThanASCIICaseFolding(a.string, b.literal); |
| } |
| |
| inline bool operator<(ComparableCaseFoldingASCIILiteral a, ComparableStringView b) |
| { |
| return lessThanASCIICaseFolding(a.literal, b.string); |
| } |
| |
| template<typename OtherType> inline bool operator==(OtherType a, ComparableStringView b) |
| { |
| return b == a; |
| } |
| |
| template<typename StorageInteger> template<unsigned size> constexpr PackedASCIILowerCodes<StorageInteger>::PackedASCIILowerCodes(const char (&string)[size]) |
| : m_value { pack(string) } |
| { |
| } |
| |
| template<typename StorageInteger> constexpr PackedASCIILowerCodes<StorageInteger>::PackedASCIILowerCodes(StorageInteger value) |
| : m_value { value } |
| { |
| } |
| |
| template<typename StorageInteger> template<unsigned size> constexpr StorageInteger PackedASCIILowerCodes<StorageInteger>::pack(const char (&string)[size]) |
| { |
| ASSERT_UNDER_CONSTEXPR_CONTEXT(size); |
| constexpr unsigned length = size - 1; |
| ASSERT_UNDER_CONSTEXPR_CONTEXT(!string[length]); |
| ASSERT_UNDER_CONSTEXPR_CONTEXT(length <= sizeof(StorageInteger)); |
| StorageInteger result = 0; |
| for (unsigned index = 0; index < length; ++index) { |
| StorageInteger code = static_cast<uint8_t>(string[index]); |
| result |= code << ((sizeof(StorageInteger) - index - 1) * 8); |
| } |
| return result; |
| } |
| |
| template<typename StorageInteger> auto PackedASCIILowerCodes<StorageInteger>::parse(StringView string) -> std::optional<PackedASCIILowerCodes> |
| { |
| if (string.length() > sizeof(StorageInteger)) |
| return std::nullopt; |
| StorageInteger result = 0; |
| for (unsigned index = 0; index < string.length(); ++index) { |
| UChar code = string[index]; |
| if (!isASCII(code)) |
| return std::nullopt; |
| result |= static_cast<StorageInteger>(toASCIILower(code)) << ((sizeof(StorageInteger) - index - 1) * 8); |
| } |
| return PackedASCIILowerCodes(result); |
| } |
| |
| template<typename StorageInteger> constexpr bool operator==(PackedASCIILowerCodes<StorageInteger> a, PackedASCIILowerCodes<StorageInteger> b) |
| { |
| return a.value() == b.value(); |
| } |
| |
| template<typename StorageInteger> constexpr bool operator<(PackedASCIILowerCodes<StorageInteger> a, PackedASCIILowerCodes<StorageInteger> b) |
| { |
| return a.value() < b.value(); |
| } |
| |
| } |
| |
| // FIXME: Rename the Comparable and Packed types for clarity and to align them better with each other. |
| using WTF::ASCIISubset; |
| using WTF::ComparableASCIILiteral; |
| using WTF::ComparableASCIISubsetLiteral; |
| using WTF::ComparableCaseFoldingASCIILiteral; |
| using WTF::ComparableLettersLiteral; |
| using WTF::PackedASCIILowerCodes; |
| using WTF::SortedArrayMap; |
| using WTF::SortedArraySet; |