blob: 9dd7523b1501cdd181eb279d8313baea1abe3970 [file] [log] [blame]
/*
* Copyright (C) 2015-2019 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <algorithm>
#include <unicode/uchar.h>
#include <wtf/ASCIICType.h>
#include <wtf/NotFound.h>
#include <wtf/UnalignedAccess.h>
#include <wtf/text/ASCIILiteral.h>
namespace WTF {
template<typename CharacterType> inline bool isLatin1(CharacterType character)
{
using UnsignedCharacterType = typename std::make_unsigned<CharacterType>::type;
return static_cast<UnsignedCharacterType>(character) <= static_cast<UnsignedCharacterType>(0xFF);
}
using CodeUnitMatchFunction = bool (*)(UChar);
template<typename CharacterTypeA, typename CharacterTypeB> bool equalIgnoringASCIICase(const CharacterTypeA*, const CharacterTypeB*, unsigned length);
template<typename CharacterTypeA, typename CharacterTypeB> bool equalIgnoringASCIICase(const CharacterTypeA*, unsigned lengthA, const CharacterTypeB*, unsigned lengthB);
template<typename StringClassA, typename StringClassB> bool equalIgnoringASCIICaseCommon(const StringClassA&, const StringClassB&);
template<typename CharacterType> bool equalLettersIgnoringASCIICase(const CharacterType*, const char* lowercaseLetters, unsigned length);
template<typename CharacterType> bool equalLettersIgnoringASCIICase(const CharacterType*, unsigned charactersLength, ASCIILiteral);
template<typename StringClass> bool equalLettersIgnoringASCIICaseCommon(const StringClass&, ASCIILiteral);
bool equalIgnoringASCIICase(const char*, const char*);
bool equalLettersIgnoringASCIICase(const char*, ASCIILiteral);
// Do comparisons 8 or 4 bytes-at-a-time on architectures where it's safe.
#if (CPU(X86_64) || CPU(ARM64)) && !ASAN_ENABLED
ALWAYS_INLINE bool equal(const LChar* aLChar, const LChar* bLChar, unsigned length)
{
unsigned dwordLength = length >> 3;
const char* a = reinterpret_cast<const char*>(aLChar);
const char* b = reinterpret_cast<const char*>(bLChar);
if (dwordLength) {
for (unsigned i = 0; i != dwordLength; ++i) {
if (unalignedLoad<uint64_t>(a) != unalignedLoad<uint64_t>(b))
return false;
a += sizeof(uint64_t);
b += sizeof(uint64_t);
}
}
if (length & 4) {
if (unalignedLoad<uint32_t>(a) != unalignedLoad<uint32_t>(b))
return false;
a += sizeof(uint32_t);
b += sizeof(uint32_t);
}
if (length & 2) {
if (unalignedLoad<uint16_t>(a) != unalignedLoad<uint16_t>(b))
return false;
a += sizeof(uint16_t);
b += sizeof(uint16_t);
}
if (length & 1 && (*reinterpret_cast<const LChar*>(a) != *reinterpret_cast<const LChar*>(b)))
return false;
return true;
}
ALWAYS_INLINE bool equal(const UChar* aUChar, const UChar* bUChar, unsigned length)
{
unsigned dwordLength = length >> 2;
const char* a = reinterpret_cast<const char*>(aUChar);
const char* b = reinterpret_cast<const char*>(bUChar);
if (dwordLength) {
for (unsigned i = 0; i != dwordLength; ++i) {
if (unalignedLoad<uint64_t>(a) != unalignedLoad<uint64_t>(b))
return false;
a += sizeof(uint64_t);
b += sizeof(uint64_t);
}
}
if (length & 2) {
if (unalignedLoad<uint32_t>(a) != unalignedLoad<uint32_t>(b))
return false;
a += sizeof(uint32_t);
b += sizeof(uint32_t);
}
if (length & 1 && (*reinterpret_cast<const UChar*>(a) != *reinterpret_cast<const UChar*>(b)))
return false;
return true;
}
#elif CPU(X86) && !ASAN_ENABLED
ALWAYS_INLINE bool equal(const LChar* aLChar, const LChar* bLChar, unsigned length)
{
const char* a = reinterpret_cast<const char*>(aLChar);
const char* b = reinterpret_cast<const char*>(bLChar);
unsigned wordLength = length >> 2;
for (unsigned i = 0; i != wordLength; ++i) {
if (unalignedLoad<uint32_t>(a) != unalignedLoad<uint32_t>(b))
return false;
a += sizeof(uint32_t);
b += sizeof(uint32_t);
}
length &= 3;
if (length) {
const LChar* aRemainder = reinterpret_cast<const LChar*>(a);
const LChar* bRemainder = reinterpret_cast<const LChar*>(b);
for (unsigned i = 0; i < length; ++i) {
if (aRemainder[i] != bRemainder[i])
return false;
}
}
return true;
}
ALWAYS_INLINE bool equal(const UChar* aUChar, const UChar* bUChar, unsigned length)
{
const char* a = reinterpret_cast<const char*>(aUChar);
const char* b = reinterpret_cast<const char*>(bUChar);
unsigned wordLength = length >> 1;
for (unsigned i = 0; i != wordLength; ++i) {
if (unalignedLoad<uint32_t>(a) != unalignedLoad<uint32_t>(b))
return false;
a += sizeof(uint32_t);
b += sizeof(uint32_t);
}
if (length & 1 && *reinterpret_cast<const UChar*>(a) != *reinterpret_cast<const UChar*>(b))
return false;
return true;
}
#elif OS(DARWIN) && WTF_ARM_ARCH_AT_LEAST(7) && !ASAN_ENABLED
ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length)
{
bool isEqual = false;
uint32_t aValue;
uint32_t bValue;
asm("subs %[length], #4\n"
"blo 2f\n"
"0:\n" // Label 0 = Start of loop over 32 bits.
"ldr %[aValue], [%[a]], #4\n"
"ldr %[bValue], [%[b]], #4\n"
"cmp %[aValue], %[bValue]\n"
"bne 66f\n"
"subs %[length], #4\n"
"bhs 0b\n"
// At this point, length can be:
// -0: 00000000000000000000000000000000 (0 bytes left)
// -1: 11111111111111111111111111111111 (3 bytes left)
// -2: 11111111111111111111111111111110 (2 bytes left)
// -3: 11111111111111111111111111111101 (1 byte left)
// -4: 11111111111111111111111111111100 (length was 0)
// The pointers are at the correct position.
"2:\n" // Label 2 = End of loop over 32 bits, check for pair of characters.
"tst %[length], #2\n"
"beq 1f\n"
"ldrh %[aValue], [%[a]], #2\n"
"ldrh %[bValue], [%[b]], #2\n"
"cmp %[aValue], %[bValue]\n"
"bne 66f\n"
"1:\n" // Label 1 = Check for a single character left.
"tst %[length], #1\n"
"beq 42f\n"
"ldrb %[aValue], [%[a]]\n"
"ldrb %[bValue], [%[b]]\n"
"cmp %[aValue], %[bValue]\n"
"bne 66f\n"
"42:\n" // Label 42 = Success.
"mov %[isEqual], #1\n"
"66:\n" // Label 66 = End without changing isEqual to 1.
: [length]"+r"(length), [isEqual]"+r"(isEqual), [a]"+r"(a), [b]"+r"(b), [aValue]"+r"(aValue), [bValue]"+r"(bValue)
:
:
);
return isEqual;
}
ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length)
{
bool isEqual = false;
uint32_t aValue;
uint32_t bValue;
asm("subs %[length], #2\n"
"blo 1f\n"
"0:\n" // Label 0 = Start of loop over 32 bits.
"ldr %[aValue], [%[a]], #4\n"
"ldr %[bValue], [%[b]], #4\n"
"cmp %[aValue], %[bValue]\n"
"bne 66f\n"
"subs %[length], #2\n"
"bhs 0b\n"
// At this point, length can be:
// -0: 00000000000000000000000000000000 (0 bytes left)
// -1: 11111111111111111111111111111111 (1 character left, 2 bytes)
// -2: 11111111111111111111111111111110 (length was zero)
// The pointers are at the correct position.
"1:\n" // Label 1 = Check for a single character left.
"tst %[length], #1\n"
"beq 42f\n"
"ldrh %[aValue], [%[a]]\n"
"ldrh %[bValue], [%[b]]\n"
"cmp %[aValue], %[bValue]\n"
"bne 66f\n"
"42:\n" // Label 42 = Success.
"mov %[isEqual], #1\n"
"66:\n" // Label 66 = End without changing isEqual to 1.
: [length]"+r"(length), [isEqual]"+r"(isEqual), [a]"+r"(a), [b]"+r"(b), [aValue]"+r"(aValue), [bValue]"+r"(bValue)
:
:
);
return isEqual;
}
#elif !ASAN_ENABLED
ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) { return !memcmp(a, b, length); }
ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) { return !memcmp(a, b, length * sizeof(UChar)); }
#else
ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length)
{
for (unsigned i = 0; i < length; ++i) {
if (a[i] != b[i])
return false;
}
return true;
}
ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length)
{
for (unsigned i = 0; i < length; ++i) {
if (a[i] != b[i])
return false;
}
return true;
}
#endif
ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length)
{
for (unsigned i = 0; i < length; ++i) {
if (a[i] != b[i])
return false;
}
return true;
}
ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) { return equal(b, a, length); }
template<typename StringClassA, typename StringClassB>
ALWAYS_INLINE bool equalCommon(const StringClassA& a, const StringClassB& b)
{
unsigned length = a.length();
if (length != b.length())
return false;
if (a.is8Bit()) {
if (b.is8Bit())
return equal(a.characters8(), b.characters8(), length);
return equal(a.characters8(), b.characters16(), length);
}
if (b.is8Bit())
return equal(a.characters16(), b.characters8(), length);
return equal(a.characters16(), b.characters16(), length);
}
template<typename StringClassA, typename StringClassB>
ALWAYS_INLINE bool equalCommon(const StringClassA* a, const StringClassB* b)
{
if (a == b)
return true;
if (!a || !b)
return false;
return equal(*a, *b);
}
template<typename StringClass, unsigned length> bool equal(const StringClass& a, const UChar (&codeUnits)[length])
{
if (a.length() != length)
return false;
if (a.is8Bit())
return equal(a.characters8(), codeUnits, length);
return equal(a.characters16(), codeUnits, length);
}
template<typename CharacterTypeA, typename CharacterTypeB>
inline bool equalIgnoringASCIICase(const CharacterTypeA* a, const CharacterTypeB* b, unsigned length)
{
for (unsigned i = 0; i < length; ++i) {
if (toASCIILower(a[i]) != toASCIILower(b[i]))
return false;
}
return true;
}
template<typename CharacterTypeA, typename CharacterTypeB> inline bool equalIgnoringASCIICase(const CharacterTypeA* a, unsigned lengthA, const CharacterTypeB* b, unsigned lengthB)
{
return lengthA == lengthB && equalIgnoringASCIICase(a, b, lengthA);
}
template<typename StringClassA, typename StringClassB>
bool equalIgnoringASCIICaseCommon(const StringClassA& a, const StringClassB& b)
{
unsigned length = a.length();
if (length != b.length())
return false;
if (a.is8Bit()) {
if (b.is8Bit())
return equalIgnoringASCIICase(a.characters8(), b.characters8(), length);
return equalIgnoringASCIICase(a.characters8(), b.characters16(), length);
}
if (b.is8Bit())
return equalIgnoringASCIICase(a.characters16(), b.characters8(), length);
return equalIgnoringASCIICase(a.characters16(), b.characters16(), length);
}
template<typename StringClassA> bool equalIgnoringASCIICaseCommon(const StringClassA& a, const char* b)
{
unsigned length = a.length();
if (length != strlen(b))
return false;
if (a.is8Bit())
return equalIgnoringASCIICase(a.characters8(), b, length);
return equalIgnoringASCIICase(a.characters16(), b, length);
}
template <typename SearchCharacterType, typename MatchCharacterType>
size_t findIgnoringASCIICase(const SearchCharacterType* source, const MatchCharacterType* matchCharacters, unsigned startOffset, unsigned searchLength, unsigned matchLength)
{
ASSERT(searchLength >= matchLength);
const SearchCharacterType* startSearchedCharacters = source + startOffset;
// delta is the number of additional times to test; delta == 0 means test only once.
unsigned delta = searchLength - matchLength;
for (unsigned i = 0; i <= delta; ++i) {
if (equalIgnoringASCIICase(startSearchedCharacters + i, matchCharacters, matchLength))
return startOffset + i;
}
return notFound;
}
inline size_t findIgnoringASCIICaseWithoutLength(const char* source, const char* matchCharacters)
{
unsigned searchLength = strlen(source);
unsigned matchLength = strlen(matchCharacters);
return matchLength < searchLength ? findIgnoringASCIICase(source, matchCharacters, 0, searchLength, matchLength) : notFound;
}
template <typename SearchCharacterType, typename MatchCharacterType>
ALWAYS_INLINE static size_t findInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
{
// Optimization: keep a running hash of the strings,
// only call equal() if the hashes match.
// delta is the number of additional times to test; delta == 0 means test only once.
unsigned delta = searchLength - matchLength;
unsigned searchHash = 0;
unsigned matchHash = 0;
for (unsigned i = 0; i < matchLength; ++i) {
searchHash += searchCharacters[i];
matchHash += matchCharacters[i];
}
unsigned i = 0;
// keep looping until we match
while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) {
if (i == delta)
return notFound;
searchHash += searchCharacters[i + matchLength];
searchHash -= searchCharacters[i];
++i;
}
return index + i;
}
template<typename CharacterType, std::enable_if_t<std::is_integral_v<CharacterType>>* = nullptr>
inline size_t find(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = 0)
{
while (index < length) {
if (characters[index] == matchCharacter)
return index;
++index;
}
return notFound;
}
ALWAYS_INLINE size_t find(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = 0)
{
return find(characters, length, static_cast<UChar>(matchCharacter), index);
}
inline size_t find(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0)
{
if (!isLatin1(matchCharacter))
return notFound;
return find(characters, length, static_cast<LChar>(matchCharacter), index);
}
template <typename SearchCharacterType, typename MatchCharacterType>
ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned start, unsigned length, unsigned matchLength)
{
// Optimization: keep a running hash of the strings,
// only call equal if the hashes match.
// delta is the number of additional times to test; delta == 0 means test only once.
unsigned delta = std::min(start, length - matchLength);
unsigned searchHash = 0;
unsigned matchHash = 0;
for (unsigned i = 0; i < matchLength; ++i) {
searchHash += searchCharacters[delta + i];
matchHash += matchCharacters[i];
}
// keep looping until we match
while (searchHash != matchHash || !equal(searchCharacters + delta, matchCharacters, matchLength)) {
if (!delta)
return notFound;
--delta;
searchHash -= searchCharacters[delta + matchLength];
searchHash += searchCharacters[delta];
}
return delta;
}
// This is marked inline since it's mostly used in non-inline functions for each string type.
// When used directly in code it's probably OK to be inline; maybe the loop will be unrolled.
template<typename CharacterType> inline bool equalLettersIgnoringASCIICase(const CharacterType* characters, const char* lowercaseLetters, unsigned length)
{
for (unsigned i = 0; i < length; ++i) {
if (!isASCIIAlphaCaselessEqual(characters[i], lowercaseLetters[i]))
return false;
}
return true;
}
template<typename CharacterType> inline bool equalLettersIgnoringASCIICase(const CharacterType* characters, unsigned charactersLength, ASCIILiteral literal)
{
unsigned literalLength = literal.length();
return charactersLength == literalLength && equalLettersIgnoringASCIICase(characters, literal.characters(), literalLength);
}
template<typename StringClass> bool inline hasPrefixWithLettersIgnoringASCIICaseCommon(const StringClass& string, const char* lowercaseLetters, unsigned length)
{
#if ASSERT_ENABLED
ASSERT(*lowercaseLetters);
for (const char* letter = lowercaseLetters; *letter; ++letter)
ASSERT(toASCIILowerUnchecked(*letter) == *letter);
#endif
ASSERT(string.length() >= length);
if (string.is8Bit())
return equalLettersIgnoringASCIICase(string.characters8(), lowercaseLetters, length);
return equalLettersIgnoringASCIICase(string.characters16(), lowercaseLetters, length);
}
// This is intentionally not marked inline because it's used often and is not speed-critical enough to want it inlined everywhere.
template<typename StringClass> bool equalLettersIgnoringASCIICaseCommon(const StringClass& string, const char* literal, unsigned literalLength)
{
unsigned length = string.length();
if (length != literalLength)
return false;
return hasPrefixWithLettersIgnoringASCIICaseCommon(string, literal, length);
}
template<typename StringClass> bool startsWithLettersIgnoringASCIICaseCommon(const StringClass& string, const char* prefix, unsigned prefixLength)
{
if (!prefixLength)
return true;
if (string.length() < prefixLength)
return false;
return hasPrefixWithLettersIgnoringASCIICaseCommon(string, prefix, prefixLength);
}
template<typename StringClass> inline bool equalLettersIgnoringASCIICaseCommon(const StringClass& string, ASCIILiteral literal)
{
return equalLettersIgnoringASCIICaseCommon(string, literal.characters(), literal.length());
}
template<typename StringClass> inline bool startsWithLettersIgnoringASCIICaseCommon(const StringClass& string, ASCIILiteral literal)
{
return startsWithLettersIgnoringASCIICaseCommon(string, literal.characters(), literal.length());
}
inline bool equalIgnoringASCIICase(const char* a, const char* b)
{
auto length = strlen(a);
return length == strlen(b) && equalIgnoringASCIICase(a, b, length);
}
inline bool equalLettersIgnoringASCIICase(ASCIILiteral a, ASCIILiteral b)
{
auto bLength = b.length();
return a.length() == bLength && equalLettersIgnoringASCIICase(a.characters(), b.characters(), bLength);
}
inline bool equalLettersIgnoringASCIICase(const char* string, ASCIILiteral literal)
{
auto literalLength = literal.length();
return strlen(string) == literalLength && equalLettersIgnoringASCIICase(string, literal.characters(), literalLength);
}
inline bool equalIgnoringASCIICase(const char* string, ASCIILiteral literal)
{
auto literalLength = literal.length();
return strlen(string) == literal.length() && equalIgnoringASCIICase(string, literal.characters(), literalLength);
}
inline bool equalIgnoringASCIICase(ASCIILiteral a, ASCIILiteral b)
{
return equalIgnoringASCIICase(a.characters(), a.length(), b.characters(), b.length());
}
}
using WTF::equalIgnoringASCIICase;
using WTF::equalLettersIgnoringASCIICase;
using WTF::isLatin1;