Source/WTF/wtf/text/WTFString.cpp - WebKit - Git at Google

 /*
  * (C) 1999 Lars Knoll (knoll@kde.org)
  * Copyright (C) 2004-2017 Apple Inc. All rights reserved.
  * Copyright (C) 2007-2009 Torch Mobile, Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Library General Public License for more details.
  *
  * You should have received a copy of the GNU Library General Public License
  * along with this library; see the file COPYING.LIB.  If not, write to
  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  * Boston, MA 02110-1301, USA.
  */

 #include "config.h"
 #include "WTFString.h"

 #include "IntegerToStringConversion.h"
 #include <stdarg.h>
 #include <wtf/ASCIICType.h>
 #include <wtf/DataLog.h>
 #include <wtf/HexNumber.h>
 #include <wtf/MathExtras.h>
 #include <wtf/NeverDestroyed.h>
 #include <wtf/text/CString.h>
 #include <wtf/Vector.h>
 #include <wtf/dtoa.h>
 #include <wtf/unicode/CharacterNames.h>
 #include <wtf/unicode/UTF8.h>

 namespace WTF {

 using namespace Unicode;

 // Construct a string with UTF-16 data.
 String::String(const UChar* characters, unsigned length)
 {
     if (characters)
         m_impl = StringImpl::create(characters, length);
 }

 // Construct a string with UTF-16 data, from a null-terminated source.
 String::String(const UChar* nullTerminatedString)
 {
     if (nullTerminatedString)
         m_impl = StringImpl::create(nullTerminatedString, lengthOfNullTerminatedString(nullTerminatedString));
 }

 // Construct a string with latin1 data.
 String::String(const LChar* characters, unsigned length)
 {
     if (characters)
         m_impl = StringImpl::create(characters, length);
 }

 String::String(const char* characters, unsigned length)
 {
     if (characters)
         m_impl = StringImpl::create(reinterpret_cast<const LChar*>(characters), length);
 }

 // Construct a string with Latin-1 data, from a null-terminated source.
 String::String(const LChar* nullTerminatedString)
 {
     if (nullTerminatedString)
         m_impl = StringImpl::create(nullTerminatedString);
 }

 String::String(const char* nullTerminatedString)
 {
     if (nullTerminatedString)
         m_impl = StringImpl::create(reinterpret_cast<const LChar*>(nullTerminatedString));
 }

 String::String(ASCIILiteral characters)
     : m_impl(StringImpl::createFromLiteral(characters))
 {
 }

 void String::append(const String& otherString)
 {
     // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.

     if (!m_impl) {
         m_impl = otherString.m_impl;
         return;
     }

     if (otherString.isEmpty())
         return;

     auto length = m_impl->length();
     auto otherLength = otherString.m_impl->length();
     if (otherLength > std::numeric_limits<unsigned>::max() - length)
         CRASH();

     if (m_impl->is8Bit() && otherString.m_impl->is8Bit()) {
         LChar* data;
         auto newImpl = StringImpl::createUninitialized(length + otherLength, data);
         StringImpl::copyCharacters(data, m_impl->characters8(), length);
         StringImpl::copyCharacters(data + length, otherString.m_impl->characters8(), otherLength);
         m_impl = WTFMove(newImpl);
         return;
     }
     UChar* data;
     auto newImpl = StringImpl::createUninitialized(length + otherLength, data);
     StringView(*m_impl).getCharactersWithUpconvert(data);
     StringView(*otherString.m_impl).getCharactersWithUpconvert(data + length);
     m_impl = WTFMove(newImpl);
 }

 void String::append(LChar character)
 {
     // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.

     if (!m_impl) {
         m_impl = StringImpl::create(&character, 1);
         return;
     }
     if (!is8Bit()) {
         append(static_cast<UChar>(character));
         return;
     }
     if (m_impl->length() >= std::numeric_limits<unsigned>::max())
         CRASH();
     LChar* data;
     auto newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data);
     StringImpl::copyCharacters(data, m_impl->characters8(), m_impl->length());
     data[m_impl->length()] = character;
     m_impl = WTFMove(newImpl);
 }

 void String::append(UChar character)
 {
     // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.

     if (!m_impl) {
         m_impl = StringImpl::create(&character, 1);
         return;
     }
     if (character <= 0xFF && is8Bit()) {
         append(static_cast<LChar>(character));
         return;
     }
     if (m_impl->length() >= std::numeric_limits<unsigned>::max())
         CRASH();
     UChar* data;
     auto newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data);
     StringView(*m_impl).getCharactersWithUpconvert(data);
     data[m_impl->length()] = character;
     m_impl = WTFMove(newImpl);
 }

 int codePointCompare(const String& a, const String& b)
 {
     return codePointCompare(a.impl(), b.impl());
 }

 void String::insert(const String& string, unsigned position)
 {
     // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.

     unsigned lengthToInsert = string.length();

     if (!lengthToInsert) {
         if (string.isNull())
             return;
         if (isNull())
             m_impl = string.impl();
         return;
     }

     if (position >= length()) {
         append(string);
         return;
     }

     if (lengthToInsert > std::numeric_limits<unsigned>::max() - length())
         CRASH();

     if (is8Bit() && string.is8Bit()) {
         LChar* data;
         auto newString = StringImpl::createUninitialized(length() + lengthToInsert, data);
         StringView(*m_impl).substring(0, position).getCharactersWithUpconvert(data);
         StringView(string).getCharactersWithUpconvert(data + position);
         StringView(*m_impl).substring(position).getCharactersWithUpconvert(data + position + lengthToInsert);
         m_impl = WTFMove(newString);
     } else {
         UChar* data;
         auto newString = StringImpl::createUninitialized(length() + lengthToInsert, data);
         StringView(*m_impl).substring(0, position).getCharactersWithUpconvert(data);
         StringView(string).getCharactersWithUpconvert(data + position);
         StringView(*m_impl).substring(position).getCharactersWithUpconvert(data + position + lengthToInsert);
         m_impl = WTFMove(newString);
     }
 }

 void String::append(const LChar* charactersToAppend, unsigned lengthToAppend)
 {
     // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.

     if (!m_impl) {
         if (!charactersToAppend)
             return;
         m_impl = StringImpl::create(charactersToAppend, lengthToAppend);
         return;
     }

     if (!lengthToAppend)
         return;

     ASSERT(charactersToAppend);

     unsigned strLength = m_impl->length();

     if (m_impl->is8Bit()) {
         if (lengthToAppend > std::numeric_limits<unsigned>::max() - strLength)
             CRASH();
         LChar* data;
         auto newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data);
         StringImpl::copyCharacters(data, m_impl->characters8(), strLength);
         StringImpl::copyCharacters(data + strLength, charactersToAppend, lengthToAppend);
         m_impl = WTFMove(newImpl);
         return;
     }

     if (lengthToAppend > std::numeric_limits<unsigned>::max() - strLength)
         CRASH();
     UChar* data;
     auto newImpl = StringImpl::createUninitialized(length() + lengthToAppend, data);
     StringImpl::copyCharacters(data, m_impl->characters16(), strLength);
     StringImpl::copyCharacters(data + strLength, charactersToAppend, lengthToAppend);
     m_impl = WTFMove(newImpl);
 }

 void String::append(const UChar* charactersToAppend, unsigned lengthToAppend)
 {
     // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.

     if (!m_impl) {
         if (!charactersToAppend)
             return;
         m_impl = StringImpl::create(charactersToAppend, lengthToAppend);
         return;
     }

     if (!lengthToAppend)
         return;

     unsigned strLength = m_impl->length();

     ASSERT(charactersToAppend);
     if (lengthToAppend > std::numeric_limits<unsigned>::max() - strLength)
         CRASH();
     UChar* data;
     auto newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data);
     if (m_impl->is8Bit())
         StringImpl::copyCharacters(data, characters8(), strLength);
     else
         StringImpl::copyCharacters(data, characters16(), strLength);
     StringImpl::copyCharacters(data + strLength, charactersToAppend, lengthToAppend);
     m_impl = WTFMove(newImpl);
 }


 UChar32 String::characterStartingAt(unsigned i) const
 {
     if (!m_impl || i >= m_impl->length())
         return 0;
     return m_impl->characterStartingAt(i);
 }

 void String::truncate(unsigned position)
 {
     if (m_impl)
         m_impl = m_impl->substring(0, position);
 }

 template<typename CharacterType> inline void String::removeInternal(const CharacterType* characters, unsigned position, unsigned lengthToRemove)
 {
     CharacterType* data;
     auto newImpl = StringImpl::createUninitialized(length() - lengthToRemove, data);
     StringImpl::copyCharacters(data, characters, position);
     StringImpl::copyCharacters(data + position, characters + position + lengthToRemove, length() - lengthToRemove - position);
     m_impl = WTFMove(newImpl);
 }

 void String::remove(unsigned position, unsigned lengthToRemove)
 {
     if (!lengthToRemove)
         return;
     auto length = this->length();
     if (position >= length)
         return;
     lengthToRemove = std::min(lengthToRemove, length - position);
     if (is8Bit())
         removeInternal(characters8(), position, lengthToRemove);
     else
         removeInternal(characters16(), position, lengthToRemove);
 }

 String String::substring(unsigned position, unsigned length) const
 {
     // FIXME: Should this function, and the many others like it, be inlined?
     return m_impl ? m_impl->substring(position, length) : String { };
 }

 String String::substringSharingImpl(unsigned offset, unsigned length) const
 {
     // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar).

     unsigned stringLength = this->length();
     offset = std::min(offset, stringLength);
     length = std::min(length, stringLength - offset);

     if (!offset && length == stringLength)
         return *this;
     return StringImpl::createSubstringSharingImpl(*m_impl, offset, length);
 }

 String String::convertToASCIILowercase() const
 {
     // FIXME: Should this function, and the many others like it, be inlined?
     return m_impl ? m_impl->convertToASCIILowercase() : String { };
 }

 String String::convertToASCIIUppercase() const
 {
     // FIXME: Should this function, and the many others like it, be inlined?
     return m_impl ? m_impl->convertToASCIIUppercase() : String { };
 }

 String String::convertToLowercaseWithoutLocale() const
 {
     // FIXME: Should this function, and the many others like it, be inlined?
     return m_impl ? m_impl->convertToLowercaseWithoutLocale() : String { };
 }

 String String::convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned failingIndex) const
 {
     // FIXME: Should this function, and the many others like it, be inlined?
     return m_impl ? m_impl->convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(failingIndex) : String { };
 }

 String String::convertToUppercaseWithoutLocale() const
 {
     // FIXME: Should this function, and the many others like it, be inlined?
     return m_impl ? m_impl->convertToUppercaseWithoutLocale() : String { };
 }

 String String::convertToLowercaseWithLocale(const AtomicString& localeIdentifier) const
 {
     // FIXME: Should this function, and the many others like it, be inlined?
     return m_impl ? m_impl->convertToLowercaseWithLocale(localeIdentifier) : String { };
 }

 String String::convertToUppercaseWithLocale(const AtomicString& localeIdentifier) const
 {
     // FIXME: Should this function, and the many others like it, be inlined?
     return m_impl ? m_impl->convertToUppercaseWithLocale(localeIdentifier) : String { };
 }

 String String::stripWhiteSpace() const
 {
     // FIXME: Should this function, and the many others like it, be inlined?
     // FIXME: This function needs a new name. For one thing, "whitespace" is a single
     // word so the "s" should be lowercase. For another, it's not clear from this name
     // that the function uses the Unicode definition of whitespace. Most WebKit callers
     // don't want that and eventually we should consider deleting this.
     return m_impl ? m_impl->stripWhiteSpace() : String { };
 }

 String String::stripLeadingAndTrailingCharacters(CodeUnitMatchFunction predicate) const
 {
     // FIXME: Should this function, and the many others like it, be inlined?
     return m_impl ? m_impl->stripLeadingAndTrailingCharacters(predicate) : String { };
 }

 String String::simplifyWhiteSpace() const
 {
     // FIXME: Should this function, and the many others like it, be inlined?
     // FIXME: This function needs a new name. For one thing, "whitespace" is a single
     // word so the "s" should be lowercase. For another, it's not clear from this name
     // that the function uses the Unicode definition of whitespace. Most WebKit callers
     // don't want that and eventually we should consider deleting this.
     return m_impl ? m_impl->simplifyWhiteSpace() : String { };
 }

 String String::simplifyWhiteSpace(CodeUnitMatchFunction isWhiteSpace) const
 {
     // FIXME: Should this function, and the many others like it, be inlined?
     return m_impl ? m_impl->simplifyWhiteSpace(isWhiteSpace) : String { };
 }

 String String::removeCharacters(CodeUnitMatchFunction findMatch) const
 {
     // FIXME: Should this function, and the many others like it, be inlined?
     return m_impl ? m_impl->removeCharacters(findMatch) : String { };
 }

 String String::foldCase() const
 {
     // FIXME: Should this function, and the many others like it, be inlined?
     return m_impl ? m_impl->foldCase() : String { };
 }

 bool String::percentage(int& result) const
 {
     if (!m_impl || !m_impl->length())
         return false;

     if ((*m_impl)[m_impl->length() - 1] != '%')
        return false;

     if (m_impl->is8Bit())
         result = charactersToIntStrict(m_impl->characters8(), m_impl->length() - 1);
     else
         result = charactersToIntStrict(m_impl->characters16(), m_impl->length() - 1);
     return true;
 }

 Vector<UChar> String::charactersWithNullTermination() const
 {
     Vector<UChar> result;

     if (m_impl) {
         result.reserveInitialCapacity(length() + 1);

         if (is8Bit()) {
             const LChar* characters8 = m_impl->characters8();
             for (size_t i = 0; i < length(); ++i)
                 result.uncheckedAppend(characters8[i]);
         } else {
             const UChar* characters16 = m_impl->characters16();
             result.append(characters16, m_impl->length());
         }

         result.append(0);
     }

     return result;
 }

 WTF_ATTRIBUTE_PRINTF(1, 0) static String createWithFormatAndArguments(const char *format, va_list args)
 {
     va_list argsCopy;
     va_copy(argsCopy, args);

 #if COMPILER(CLANG)
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wformat-nonliteral"
 #endif

 #if USE(CF) && !OS(WINDOWS)
     if (strstr(format, "%@")) {
         auto cfFormat = adoptCF(CFStringCreateWithCString(kCFAllocatorDefault, format, kCFStringEncodingUTF8));
         auto result = adoptCF(CFStringCreateWithFormatAndArguments(kCFAllocatorDefault, nullptr, cfFormat.get(), args));
         va_end(args);
         return result.get();
     }
 #endif

     // Do the format once to get the length.
 #if COMPILER(MSVC)
     int result = _vscprintf(format, args);
 #else
     char ch;
     int result = vsnprintf(&ch, 1, format, args);
 #endif
     va_end(args);

     if (result == 0)
         return emptyString();
     if (result < 0)
         return String();

     Vector<char, 256> buffer;
     unsigned len = result;
     buffer.grow(len + 1);

     // Now do the formatting again, guaranteed to fit.
     vsnprintf(buffer.data(), buffer.size(), format, argsCopy);
     va_end(argsCopy);

 #if COMPILER(CLANG)
 #pragma clang diagnostic pop
 #endif

     return StringImpl::create(reinterpret_cast<const LChar*>(buffer.data()), len);
 }

 String String::formatWithArguments(const char *format, va_list args)
 {
     return createWithFormatAndArguments(format, args);
 }

 String String::format(const char *format, ...)
 {
     va_list args;
     va_start(args, format);
     String result = createWithFormatAndArguments(format, args);
     va_end(args);
     return result;
 }

 String String::number(int number)
 {
     return numberToStringSigned<String>(number);
 }

 String String::number(unsigned int number)
 {
     return numberToStringUnsigned<String>(number);
 }

 String String::number(long number)
 {
     return numberToStringSigned<String>(number);
 }

 String String::number(unsigned long number)
 {
     return numberToStringUnsigned<String>(number);
 }

 String String::number(long long number)
 {
     return numberToStringSigned<String>(number);
 }

 String String::number(unsigned long long number)
 {
     return numberToStringUnsigned<String>(number);
 }

 String String::number(double number, unsigned precision, TrailingZerosTruncatingPolicy trailingZerosTruncatingPolicy)
 {
     NumberToStringBuffer buffer;
     return String(numberToFixedPrecisionString(number, precision, buffer, trailingZerosTruncatingPolicy == TruncateTrailingZeros));
 }

 String String::numberToStringECMAScript(double number)
 {
     NumberToStringBuffer buffer;
     return String(numberToString(number, buffer));
 }

 String String::numberToStringFixedWidth(double number, unsigned decimalPlaces)
 {
     NumberToStringBuffer buffer;
     return String(numberToFixedWidthString(number, decimalPlaces, buffer));
 }

 int String::toIntStrict(bool* ok, int base) const
 {
     if (!m_impl) {
         if (ok)
             *ok = false;
         return 0;
     }
     return m_impl->toIntStrict(ok, base);
 }

 unsigned String::toUIntStrict(bool* ok, int base) const
 {
     if (!m_impl) {
         if (ok)
             *ok = false;
         return 0;
     }
     return m_impl->toUIntStrict(ok, base);
 }

 int64_t String::toInt64Strict(bool* ok, int base) const
 {
     if (!m_impl) {
         if (ok)
             *ok = false;
         return 0;
     }
     return m_impl->toInt64Strict(ok, base);
 }

 uint64_t String::toUInt64Strict(bool* ok, int base) const
 {
     if (!m_impl) {
         if (ok)
             *ok = false;
         return 0;
     }
     return m_impl->toUInt64Strict(ok, base);
 }

 intptr_t String::toIntPtrStrict(bool* ok, int base) const
 {
     if (!m_impl) {
         if (ok)
             *ok = false;
         return 0;
     }
     return m_impl->toIntPtrStrict(ok, base);
 }

 int String::toInt(bool* ok) const
 {
     if (!m_impl) {
         if (ok)
             *ok = false;
         return 0;
     }
     return m_impl->toInt(ok);
 }

 unsigned String::toUInt(bool* ok) const
 {
     if (!m_impl) {
         if (ok)
             *ok = false;
         return 0;
     }
     return m_impl->toUInt(ok);
 }

 int64_t String::toInt64(bool* ok) const
 {
     if (!m_impl) {
         if (ok)
             *ok = false;
         return 0;
     }
     return m_impl->toInt64(ok);
 }

 uint64_t String::toUInt64(bool* ok) const
 {
     if (!m_impl) {
         if (ok)
             *ok = false;
         return 0;
     }
     return m_impl->toUInt64(ok);
 }

 intptr_t String::toIntPtr(bool* ok) const
 {
     if (!m_impl) {
         if (ok)
             *ok = false;
         return 0;
     }
     return m_impl->toIntPtr(ok);
 }

 double String::toDouble(bool* ok) const
 {
     if (!m_impl) {
         if (ok)
             *ok = false;
         return 0.0;
     }
     return m_impl->toDouble(ok);
 }

 float String::toFloat(bool* ok) const
 {
     if (!m_impl) {
         if (ok)
             *ok = false;
         return 0.0f;
     }
     return m_impl->toFloat(ok);
 }

 String String::isolatedCopy() const &
 {
     // FIXME: Should this function, and the many others like it, be inlined?
     return m_impl ? m_impl->isolatedCopy() : String { };
 }

 String String::isolatedCopy() &&
 {
     if (isSafeToSendToAnotherThread()) {
         // Since we know that our string is a temporary that will be destroyed
         // we can just steal the m_impl from it, thus avoiding a copy.
         return { WTFMove(*this) };
     }

     return m_impl ? m_impl->isolatedCopy() : String { };
 }

 bool String::isSafeToSendToAnotherThread() const
 {
     // AtomicStrings are not safe to send between threads as ~StringImpl()
     // will try to remove them from the wrong AtomicStringTable.
     return isEmpty() || (m_impl->hasOneRef() && !m_impl->isAtomic());
 }

 void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const
 {
     result.clear();

     unsigned startPos = 0;
     size_t endPos;
     while ((endPos = find(separator, startPos)) != notFound) {
         if (allowEmptyEntries || startPos != endPos)
             result.append(substring(startPos, endPos - startPos));
         startPos = endPos + separator.length();
     }
     if (allowEmptyEntries || startPos != length())
         result.append(substring(startPos));
 }

 void String::split(UChar separator, bool allowEmptyEntries, const SplitFunctor& functor) const
 {
     StringView view(*this);

     unsigned startPos = 0;
     size_t endPos;
     while ((endPos = find(separator, startPos)) != notFound) {
         if (allowEmptyEntries || startPos != endPos)
             functor(view.substring(startPos, endPos - startPos));
         startPos = endPos + 1;
     }
     if (allowEmptyEntries || startPos != length())
         functor(view.substring(startPos));
 }

 void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const
 {
     result.clear();
     split(separator, allowEmptyEntries, [&result](StringView item) {
         result.append(item.toString());
     });
 }

 CString String::ascii() const
 {
     // Printable ASCII characters 32..127 and the null character are
     // preserved, characters outside of this range are converted to '?'.

     unsigned length = this->length();
     if (!length) {
         char* characterBuffer;
         return CString::newUninitialized(length, characterBuffer);
     }

     if (this->is8Bit()) {
         const LChar* characters = this->characters8();

         char* characterBuffer;
         CString result = CString::newUninitialized(length, characterBuffer);

         for (unsigned i = 0; i < length; ++i) {
             LChar ch = characters[i];
             characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
         }

         return result;
     }

     const UChar* characters = this->characters16();

     char* characterBuffer;
     CString result = CString::newUninitialized(length, characterBuffer);

     for (unsigned i = 0; i < length; ++i) {
         UChar ch = characters[i];
         characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
     }

     return result;
 }

 CString String::latin1() const
 {
     // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
     // preserved, characters outside of this range are converted to '?'.

     unsigned length = this->length();

     if (!length)
         return CString("", 0);

     if (is8Bit())
         return CString(reinterpret_cast<const char*>(this->characters8()), length);

     const UChar* characters = this->characters16();

     char* characterBuffer;
     CString result = CString::newUninitialized(length, characterBuffer);

     for (unsigned i = 0; i < length; ++i) {
         UChar ch = characters[i];
         characterBuffer[i] = ch > 0xff ? '?' : ch;
     }

     return result;
 }

 CString String::utf8(ConversionMode mode) const
 {
     return m_impl ? m_impl->utf8(mode) : CString { "", 0 };
 }

 CString String::utf8() const
 {
     return utf8(LenientConversion);
 }

 String String::make8BitFrom16BitSource(const UChar* source, size_t length)
 {
     if (!length)
         return String();

     LChar* destination;
     String result = String::createUninitialized(length, destination);

     copyLCharsFromUCharSource(destination, source, length);

     return result;
 }

 String String::make16BitFrom8BitSource(const LChar* source, size_t length)
 {
     if (!length)
         return String();

     UChar* destination;
     String result = String::createUninitialized(length, destination);

     StringImpl::copyCharacters(destination, source, length);

     return result;
 }

 String String::fromUTF8(const LChar* stringStart, size_t length)
 {
     if (length > std::numeric_limits<unsigned>::max())
         CRASH();

     if (!stringStart)
         return String();

     if (!length)
         return emptyString();

     if (charactersAreAllASCII(stringStart, length))
         return StringImpl::create(stringStart, length);

     Vector<UChar, 1024> buffer(length);
     UChar* bufferStart = buffer.data();

     UChar* bufferCurrent = bufferStart;
     const char* stringCurrent = reinterpret_cast<const char*>(stringStart);
     if (convertUTF8ToUTF16(&stringCurrent, reinterpret_cast<const char *>(stringStart + length), &bufferCurrent, bufferCurrent + buffer.size()) != conversionOK)
         return String();

     unsigned utf16Length = bufferCurrent - bufferStart;
     ASSERT_WITH_SECURITY_IMPLICATION(utf16Length < length);
     return StringImpl::create(bufferStart, utf16Length);
 }

 String String::fromUTF8(const LChar* string)
 {
     if (!string)
         return String();
     return fromUTF8(string, strlen(reinterpret_cast<const char*>(string)));
 }

 String String::fromUTF8(const CString& s)
 {
     return fromUTF8(s.data());
 }

 String String::fromUTF8WithLatin1Fallback(const LChar* string, size_t size)
 {
     String utf8 = fromUTF8(string, size);
     if (!utf8)
         return String(string, size);
     return utf8;
 }

 // String Operations

 static bool isCharacterAllowedInBase(UChar c, int base)
 {
     if (c > 0x7F)
         return false;
     if (isASCIIDigit(c))
         return c - '0' < base;
     if (isASCIIAlpha(c)) {
         if (base > 36)
             base = 36;
         return (c >= 'a' && c < 'a' + base - 10)
             || (c >= 'A' && c < 'A' + base - 10);
     }
     return false;
 }

 template<typename IntegralType, typename CharacterType>
 static inline IntegralType toIntegralType(const CharacterType* data, size_t length, bool* ok, int base)
 {
     static const IntegralType integralMax = std::numeric_limits<IntegralType>::max();
     static const bool isSigned = std::numeric_limits<IntegralType>::is_signed;
     const IntegralType maxMultiplier = integralMax / base;

     IntegralType value = 0;
     bool isOk = false;
     bool isNegative = false;

     if (!data)
         goto bye;

     // skip leading whitespace
     while (length && isSpaceOrNewline(*data)) {
         --length;
         ++data;
     }

     if (isSigned && length && *data == '-') {
         --length;
         ++data;
         isNegative = true;
     } else if (length && *data == '+') {
         --length;
         ++data;
     }

     if (!length || !isCharacterAllowedInBase(*data, base))
         goto bye;

     while (length && isCharacterAllowedInBase(*data, base)) {
         --length;
         IntegralType digitValue;
         auto c = *data;
         if (isASCIIDigit(c))
             digitValue = c - '0';
         else if (c >= 'a')
             digitValue = c - 'a' + 10;
         else
             digitValue = c - 'A' + 10;

         if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative))
             goto bye;

         value = base * value + digitValue;
         ++data;
     }

 #if COMPILER(MSVC)
 #pragma warning(push, 0)
 #pragma warning(disable:4146)
 #endif

     if (isNegative)
         value = -value;

 #if COMPILER(MSVC)
 #pragma warning(pop)
 #endif

     // skip trailing space
     while (length && isSpaceOrNewline(*data)) {
         --length;
         ++data;
     }

     if (!length)
         isOk = true;
 bye:
     if (ok)
         *ok = isOk;
     return isOk ? value : 0;
 }

 template<typename CharacterType>
 static unsigned lengthOfCharactersAsInteger(const CharacterType* data, size_t length)
 {
     size_t i = 0;

     // Allow leading spaces.
     for (; i != length; ++i) {
         if (!isSpaceOrNewline(data[i]))
             break;
     }

     // Allow sign.
     if (i != length && (data[i] == '+' || data[i] == '-'))
         ++i;

     // Allow digits.
     for (; i != length; ++i) {
         if (!isASCIIDigit(data[i]))
             break;
     }

     return i;
 }

 int charactersToIntStrict(const LChar* data, size_t length, bool* ok, int base)
 {
     return toIntegralType<int, LChar>(data, length, ok, base);
 }

 int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base)
 {
     return toIntegralType<int, UChar>(data, length, ok, base);
 }

 unsigned charactersToUIntStrict(const LChar* data, size_t length, bool* ok, int base)
 {
     return toIntegralType<unsigned, LChar>(data, length, ok, base);
 }

 unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base)
 {
     return toIntegralType<unsigned, UChar>(data, length, ok, base);
 }

 int64_t charactersToInt64Strict(const LChar* data, size_t length, bool* ok, int base)
 {
     return toIntegralType<int64_t, LChar>(data, length, ok, base);
 }

 int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base)
 {
     return toIntegralType<int64_t, UChar>(data, length, ok, base);
 }

 uint64_t charactersToUInt64Strict(const LChar* data, size_t length, bool* ok, int base)
 {
     return toIntegralType<uint64_t, LChar>(data, length, ok, base);
 }

 uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base)
 {
     return toIntegralType<uint64_t, UChar>(data, length, ok, base);
 }

 intptr_t charactersToIntPtrStrict(const LChar* data, size_t length, bool* ok, int base)
 {
     return toIntegralType<intptr_t, LChar>(data, length, ok, base);
 }

 intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, int base)
 {
     return toIntegralType<intptr_t, UChar>(data, length, ok, base);
 }

 int charactersToInt(const LChar* data, size_t length, bool* ok)
 {
     return toIntegralType<int, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10);
 }

 int charactersToInt(const UChar* data, size_t length, bool* ok)
 {
     return toIntegralType<int, UChar>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
 }

 unsigned charactersToUInt(const LChar* data, size_t length, bool* ok)
 {
     return toIntegralType<unsigned, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10);
 }

 unsigned charactersToUInt(const UChar* data, size_t length, bool* ok)
 {
     return toIntegralType<unsigned, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10);
 }

 int64_t charactersToInt64(const LChar* data, size_t length, bool* ok)
 {
     return toIntegralType<int64_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10);
 }

 int64_t charactersToInt64(const UChar* data, size_t length, bool* ok)
 {
     return toIntegralType<int64_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10);
 }

 uint64_t charactersToUInt64(const LChar* data, size_t length, bool* ok)
 {
     return toIntegralType<uint64_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10);
 }

 uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok)
 {
     return toIntegralType<uint64_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10);
 }

 intptr_t charactersToIntPtr(const LChar* data, size_t length, bool* ok)
 {
     return toIntegralType<intptr_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10);
 }

 intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok)
 {
     return toIntegralType<intptr_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10);
 }

 enum TrailingJunkPolicy { DisallowTrailingJunk, AllowTrailingJunk };

 template<typename CharacterType, TrailingJunkPolicy policy>
 static inline double toDoubleType(const CharacterType* data, size_t length, bool* ok, size_t& parsedLength)
 {
     size_t leadingSpacesLength = 0;
     while (leadingSpacesLength < length && isASCIISpace(data[leadingSpacesLength]))
         ++leadingSpacesLength;

     double number = parseDouble(data + leadingSpacesLength, length - leadingSpacesLength, parsedLength);
     if (!parsedLength) {
         if (ok)
             *ok = false;
         return 0.0;
     }

     parsedLength += leadingSpacesLength;
     if (ok)
         *ok = policy == AllowTrailingJunk || parsedLength == length;
     return number;
 }

 double charactersToDouble(const LChar* data, size_t length, bool* ok)
 {
     size_t parsedLength;
     return toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, parsedLength);
 }

 double charactersToDouble(const UChar* data, size_t length, bool* ok)
 {
     size_t parsedLength;
     return toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, parsedLength);
 }

 float charactersToFloat(const LChar* data, size_t length, bool* ok)
 {
     // FIXME: This will return ok even when the string fits into a double but not a float.
     size_t parsedLength;
     return static_cast<float>(toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, parsedLength));
 }

 float charactersToFloat(const UChar* data, size_t length, bool* ok)
 {
     // FIXME: This will return ok even when the string fits into a double but not a float.
     size_t parsedLength;
     return static_cast<float>(toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, parsedLength));
 }

 float charactersToFloat(const LChar* data, size_t length, size_t& parsedLength)
 {
     // FIXME: This will return ok even when the string fits into a double but not a float.
     return static_cast<float>(toDoubleType<LChar, AllowTrailingJunk>(data, length, 0, parsedLength));
 }

 float charactersToFloat(const UChar* data, size_t length, size_t& parsedLength)
 {
     // FIXME: This will return ok even when the string fits into a double but not a float.
     return static_cast<float>(toDoubleType<UChar, AllowTrailingJunk>(data, length, 0, parsedLength));
 }

 const String& emptyString()
 {
     static NeverDestroyed<String> emptyString(StringImpl::empty());
     return emptyString;
 }

 } // namespace WTF

 #ifndef NDEBUG

 // For use in the debugger.
 String* string(const char*);
 Vector<char> asciiDebug(StringImpl* impl);
 Vector<char> asciiDebug(String& string);

 void String::show() const
 {
     dataLogF("%s\n", asciiDebug(impl()).data());
 }

 String* string(const char* s)
 {
     // Intentionally leaks memory!
     return new String(s);
 }

 Vector<char> asciiDebug(StringImpl* impl)
 {
     if (!impl)
         return asciiDebug(String(ASCIILiteral("[null]")).impl());

     Vector<char> buffer;
     for (unsigned i = 0; i < impl->length(); ++i) {
         UChar ch = (*impl)[i];
         if (isASCIIPrintable(ch)) {
             if (ch == '\\')
                 buffer.append(ch);
             buffer.append(ch);
         } else {
             buffer.append('\\');
             buffer.append('u');
             appendUnsignedAsHexFixedSize(ch, buffer, 4);
         }
     }
     buffer.append('\0');
     return buffer;
 }

 Vector<char> asciiDebug(String& string)
 {
     return asciiDebug(string.impl());
 }

 #endif