| /* |
| * (C) 1999 Lars Knoll (knoll@kde.org) |
| * Copyright (C) 2004-2017 Apple Inc. All rights reserved. |
| * Copyright (C) 2007-2009 Torch Mobile, Inc. |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Library General Public |
| * License as published by the Free Software Foundation; either |
| * version 2 of the License, or (at your option) any later version. |
| * |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Library General Public License for more details. |
| * |
| * You should have received a copy of the GNU Library General Public License |
| * along with this library; see the file COPYING.LIB. If not, write to |
| * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| * Boston, MA 02110-1301, USA. |
| */ |
| |
| #include "config.h" |
| #include "WTFString.h" |
| |
| #include "IntegerToStringConversion.h" |
| #include <stdarg.h> |
| #include <wtf/ASCIICType.h> |
| #include <wtf/DataLog.h> |
| #include <wtf/HexNumber.h> |
| #include <wtf/MathExtras.h> |
| #include <wtf/NeverDestroyed.h> |
| #include <wtf/text/CString.h> |
| #include <wtf/Vector.h> |
| #include <wtf/dtoa.h> |
| #include <wtf/unicode/CharacterNames.h> |
| #include <wtf/unicode/UTF8.h> |
| |
| namespace WTF { |
| |
| using namespace Unicode; |
| |
| // Construct a string with UTF-16 data. |
| String::String(const UChar* characters, unsigned length) |
| { |
| if (characters) |
| m_impl = StringImpl::create(characters, length); |
| } |
| |
| // Construct a string with UTF-16 data, from a null-terminated source. |
| String::String(const UChar* nullTerminatedString) |
| { |
| if (nullTerminatedString) |
| m_impl = StringImpl::create(nullTerminatedString, lengthOfNullTerminatedString(nullTerminatedString)); |
| } |
| |
| // Construct a string with latin1 data. |
| String::String(const LChar* characters, unsigned length) |
| { |
| if (characters) |
| m_impl = StringImpl::create(characters, length); |
| } |
| |
| String::String(const char* characters, unsigned length) |
| { |
| if (characters) |
| m_impl = StringImpl::create(reinterpret_cast<const LChar*>(characters), length); |
| } |
| |
| // Construct a string with Latin-1 data, from a null-terminated source. |
| String::String(const LChar* nullTerminatedString) |
| { |
| if (nullTerminatedString) |
| m_impl = StringImpl::create(nullTerminatedString); |
| } |
| |
| String::String(const char* nullTerminatedString) |
| { |
| if (nullTerminatedString) |
| m_impl = StringImpl::create(reinterpret_cast<const LChar*>(nullTerminatedString)); |
| } |
| |
| String::String(ASCIILiteral characters) |
| : m_impl(StringImpl::createFromLiteral(characters)) |
| { |
| } |
| |
| void String::append(const String& otherString) |
| { |
| // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API. |
| |
| if (!m_impl) { |
| m_impl = otherString.m_impl; |
| return; |
| } |
| |
| if (otherString.isEmpty()) |
| return; |
| |
| auto length = m_impl->length(); |
| auto otherLength = otherString.m_impl->length(); |
| if (otherLength > std::numeric_limits<unsigned>::max() - length) |
| CRASH(); |
| |
| if (m_impl->is8Bit() && otherString.m_impl->is8Bit()) { |
| LChar* data; |
| auto newImpl = StringImpl::createUninitialized(length + otherLength, data); |
| StringImpl::copyCharacters(data, m_impl->characters8(), length); |
| StringImpl::copyCharacters(data + length, otherString.m_impl->characters8(), otherLength); |
| m_impl = WTFMove(newImpl); |
| return; |
| } |
| UChar* data; |
| auto newImpl = StringImpl::createUninitialized(length + otherLength, data); |
| StringView(*m_impl).getCharactersWithUpconvert(data); |
| StringView(*otherString.m_impl).getCharactersWithUpconvert(data + length); |
| m_impl = WTFMove(newImpl); |
| } |
| |
| void String::append(LChar character) |
| { |
| // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API. |
| |
| if (!m_impl) { |
| m_impl = StringImpl::create(&character, 1); |
| return; |
| } |
| if (!is8Bit()) { |
| append(static_cast<UChar>(character)); |
| return; |
| } |
| if (m_impl->length() >= std::numeric_limits<unsigned>::max()) |
| CRASH(); |
| LChar* data; |
| auto newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data); |
| StringImpl::copyCharacters(data, m_impl->characters8(), m_impl->length()); |
| data[m_impl->length()] = character; |
| m_impl = WTFMove(newImpl); |
| } |
| |
| void String::append(UChar character) |
| { |
| // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API. |
| |
| if (!m_impl) { |
| m_impl = StringImpl::create(&character, 1); |
| return; |
| } |
| if (character <= 0xFF && is8Bit()) { |
| append(static_cast<LChar>(character)); |
| return; |
| } |
| if (m_impl->length() >= std::numeric_limits<unsigned>::max()) |
| CRASH(); |
| UChar* data; |
| auto newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data); |
| StringView(*m_impl).getCharactersWithUpconvert(data); |
| data[m_impl->length()] = character; |
| m_impl = WTFMove(newImpl); |
| } |
| |
| int codePointCompare(const String& a, const String& b) |
| { |
| return codePointCompare(a.impl(), b.impl()); |
| } |
| |
| void String::insert(const String& string, unsigned position) |
| { |
| // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API. |
| |
| unsigned lengthToInsert = string.length(); |
| |
| if (!lengthToInsert) { |
| if (string.isNull()) |
| return; |
| if (isNull()) |
| m_impl = string.impl(); |
| return; |
| } |
| |
| if (position >= length()) { |
| append(string); |
| return; |
| } |
| |
| if (lengthToInsert > std::numeric_limits<unsigned>::max() - length()) |
| CRASH(); |
| |
| if (is8Bit() && string.is8Bit()) { |
| LChar* data; |
| auto newString = StringImpl::createUninitialized(length() + lengthToInsert, data); |
| StringView(*m_impl).substring(0, position).getCharactersWithUpconvert(data); |
| StringView(string).getCharactersWithUpconvert(data + position); |
| StringView(*m_impl).substring(position).getCharactersWithUpconvert(data + position + lengthToInsert); |
| m_impl = WTFMove(newString); |
| } else { |
| UChar* data; |
| auto newString = StringImpl::createUninitialized(length() + lengthToInsert, data); |
| StringView(*m_impl).substring(0, position).getCharactersWithUpconvert(data); |
| StringView(string).getCharactersWithUpconvert(data + position); |
| StringView(*m_impl).substring(position).getCharactersWithUpconvert(data + position + lengthToInsert); |
| m_impl = WTFMove(newString); |
| } |
| } |
| |
| void String::append(const LChar* charactersToAppend, unsigned lengthToAppend) |
| { |
| // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API. |
| |
| if (!m_impl) { |
| if (!charactersToAppend) |
| return; |
| m_impl = StringImpl::create(charactersToAppend, lengthToAppend); |
| return; |
| } |
| |
| if (!lengthToAppend) |
| return; |
| |
| ASSERT(charactersToAppend); |
| |
| unsigned strLength = m_impl->length(); |
| |
| if (m_impl->is8Bit()) { |
| if (lengthToAppend > std::numeric_limits<unsigned>::max() - strLength) |
| CRASH(); |
| LChar* data; |
| auto newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data); |
| StringImpl::copyCharacters(data, m_impl->characters8(), strLength); |
| StringImpl::copyCharacters(data + strLength, charactersToAppend, lengthToAppend); |
| m_impl = WTFMove(newImpl); |
| return; |
| } |
| |
| if (lengthToAppend > std::numeric_limits<unsigned>::max() - strLength) |
| CRASH(); |
| UChar* data; |
| auto newImpl = StringImpl::createUninitialized(length() + lengthToAppend, data); |
| StringImpl::copyCharacters(data, m_impl->characters16(), strLength); |
| StringImpl::copyCharacters(data + strLength, charactersToAppend, lengthToAppend); |
| m_impl = WTFMove(newImpl); |
| } |
| |
| void String::append(const UChar* charactersToAppend, unsigned lengthToAppend) |
| { |
| // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API. |
| |
| if (!m_impl) { |
| if (!charactersToAppend) |
| return; |
| m_impl = StringImpl::create(charactersToAppend, lengthToAppend); |
| return; |
| } |
| |
| if (!lengthToAppend) |
| return; |
| |
| unsigned strLength = m_impl->length(); |
| |
| ASSERT(charactersToAppend); |
| if (lengthToAppend > std::numeric_limits<unsigned>::max() - strLength) |
| CRASH(); |
| UChar* data; |
| auto newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data); |
| if (m_impl->is8Bit()) |
| StringImpl::copyCharacters(data, characters8(), strLength); |
| else |
| StringImpl::copyCharacters(data, characters16(), strLength); |
| StringImpl::copyCharacters(data + strLength, charactersToAppend, lengthToAppend); |
| m_impl = WTFMove(newImpl); |
| } |
| |
| |
| UChar32 String::characterStartingAt(unsigned i) const |
| { |
| if (!m_impl || i >= m_impl->length()) |
| return 0; |
| return m_impl->characterStartingAt(i); |
| } |
| |
| void String::truncate(unsigned position) |
| { |
| if (m_impl) |
| m_impl = m_impl->substring(0, position); |
| } |
| |
| template<typename CharacterType> inline void String::removeInternal(const CharacterType* characters, unsigned position, unsigned lengthToRemove) |
| { |
| CharacterType* data; |
| auto newImpl = StringImpl::createUninitialized(length() - lengthToRemove, data); |
| StringImpl::copyCharacters(data, characters, position); |
| StringImpl::copyCharacters(data + position, characters + position + lengthToRemove, length() - lengthToRemove - position); |
| m_impl = WTFMove(newImpl); |
| } |
| |
| void String::remove(unsigned position, unsigned lengthToRemove) |
| { |
| if (!lengthToRemove) |
| return; |
| auto length = this->length(); |
| if (position >= length) |
| return; |
| lengthToRemove = std::min(lengthToRemove, length - position); |
| if (is8Bit()) |
| removeInternal(characters8(), position, lengthToRemove); |
| else |
| removeInternal(characters16(), position, lengthToRemove); |
| } |
| |
| String String::substring(unsigned position, unsigned length) const |
| { |
| // FIXME: Should this function, and the many others like it, be inlined? |
| return m_impl ? m_impl->substring(position, length) : String { }; |
| } |
| |
| String String::substringSharingImpl(unsigned offset, unsigned length) const |
| { |
| // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar). |
| |
| unsigned stringLength = this->length(); |
| offset = std::min(offset, stringLength); |
| length = std::min(length, stringLength - offset); |
| |
| if (!offset && length == stringLength) |
| return *this; |
| return StringImpl::createSubstringSharingImpl(*m_impl, offset, length); |
| } |
| |
| String String::convertToASCIILowercase() const |
| { |
| // FIXME: Should this function, and the many others like it, be inlined? |
| return m_impl ? m_impl->convertToASCIILowercase() : String { }; |
| } |
| |
| String String::convertToASCIIUppercase() const |
| { |
| // FIXME: Should this function, and the many others like it, be inlined? |
| return m_impl ? m_impl->convertToASCIIUppercase() : String { }; |
| } |
| |
| String String::convertToLowercaseWithoutLocale() const |
| { |
| // FIXME: Should this function, and the many others like it, be inlined? |
| return m_impl ? m_impl->convertToLowercaseWithoutLocale() : String { }; |
| } |
| |
| String String::convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned failingIndex) const |
| { |
| // FIXME: Should this function, and the many others like it, be inlined? |
| return m_impl ? m_impl->convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(failingIndex) : String { }; |
| } |
| |
| String String::convertToUppercaseWithoutLocale() const |
| { |
| // FIXME: Should this function, and the many others like it, be inlined? |
| return m_impl ? m_impl->convertToUppercaseWithoutLocale() : String { }; |
| } |
| |
| String String::convertToLowercaseWithLocale(const AtomicString& localeIdentifier) const |
| { |
| // FIXME: Should this function, and the many others like it, be inlined? |
| return m_impl ? m_impl->convertToLowercaseWithLocale(localeIdentifier) : String { }; |
| } |
| |
| String String::convertToUppercaseWithLocale(const AtomicString& localeIdentifier) const |
| { |
| // FIXME: Should this function, and the many others like it, be inlined? |
| return m_impl ? m_impl->convertToUppercaseWithLocale(localeIdentifier) : String { }; |
| } |
| |
| String String::stripWhiteSpace() const |
| { |
| // FIXME: Should this function, and the many others like it, be inlined? |
| // FIXME: This function needs a new name. For one thing, "whitespace" is a single |
| // word so the "s" should be lowercase. For another, it's not clear from this name |
| // that the function uses the Unicode definition of whitespace. Most WebKit callers |
| // don't want that and eventually we should consider deleting this. |
| return m_impl ? m_impl->stripWhiteSpace() : String { }; |
| } |
| |
| String String::stripLeadingAndTrailingCharacters(CodeUnitMatchFunction predicate) const |
| { |
| // FIXME: Should this function, and the many others like it, be inlined? |
| return m_impl ? m_impl->stripLeadingAndTrailingCharacters(predicate) : String { }; |
| } |
| |
| String String::simplifyWhiteSpace() const |
| { |
| // FIXME: Should this function, and the many others like it, be inlined? |
| // FIXME: This function needs a new name. For one thing, "whitespace" is a single |
| // word so the "s" should be lowercase. For another, it's not clear from this name |
| // that the function uses the Unicode definition of whitespace. Most WebKit callers |
| // don't want that and eventually we should consider deleting this. |
| return m_impl ? m_impl->simplifyWhiteSpace() : String { }; |
| } |
| |
| String String::simplifyWhiteSpace(CodeUnitMatchFunction isWhiteSpace) const |
| { |
| // FIXME: Should this function, and the many others like it, be inlined? |
| return m_impl ? m_impl->simplifyWhiteSpace(isWhiteSpace) : String { }; |
| } |
| |
| String String::removeCharacters(CodeUnitMatchFunction findMatch) const |
| { |
| // FIXME: Should this function, and the many others like it, be inlined? |
| return m_impl ? m_impl->removeCharacters(findMatch) : String { }; |
| } |
| |
| String String::foldCase() const |
| { |
| // FIXME: Should this function, and the many others like it, be inlined? |
| return m_impl ? m_impl->foldCase() : String { }; |
| } |
| |
| bool String::percentage(int& result) const |
| { |
| if (!m_impl || !m_impl->length()) |
| return false; |
| |
| if ((*m_impl)[m_impl->length() - 1] != '%') |
| return false; |
| |
| if (m_impl->is8Bit()) |
| result = charactersToIntStrict(m_impl->characters8(), m_impl->length() - 1); |
| else |
| result = charactersToIntStrict(m_impl->characters16(), m_impl->length() - 1); |
| return true; |
| } |
| |
| Vector<UChar> String::charactersWithNullTermination() const |
| { |
| Vector<UChar> result; |
| |
| if (m_impl) { |
| result.reserveInitialCapacity(length() + 1); |
| |
| if (is8Bit()) { |
| const LChar* characters8 = m_impl->characters8(); |
| for (size_t i = 0; i < length(); ++i) |
| result.uncheckedAppend(characters8[i]); |
| } else { |
| const UChar* characters16 = m_impl->characters16(); |
| result.append(characters16, m_impl->length()); |
| } |
| |
| result.append(0); |
| } |
| |
| return result; |
| } |
| |
| WTF_ATTRIBUTE_PRINTF(1, 0) static String createWithFormatAndArguments(const char *format, va_list args) |
| { |
| va_list argsCopy; |
| va_copy(argsCopy, args); |
| |
| #if COMPILER(CLANG) |
| #pragma clang diagnostic push |
| #pragma clang diagnostic ignored "-Wformat-nonliteral" |
| #endif |
| |
| #if USE(CF) && !OS(WINDOWS) |
| if (strstr(format, "%@")) { |
| auto cfFormat = adoptCF(CFStringCreateWithCString(kCFAllocatorDefault, format, kCFStringEncodingUTF8)); |
| auto result = adoptCF(CFStringCreateWithFormatAndArguments(kCFAllocatorDefault, nullptr, cfFormat.get(), args)); |
| va_end(args); |
| return result.get(); |
| } |
| #endif |
| |
| // Do the format once to get the length. |
| #if COMPILER(MSVC) |
| int result = _vscprintf(format, args); |
| #else |
| char ch; |
| int result = vsnprintf(&ch, 1, format, args); |
| #endif |
| va_end(args); |
| |
| if (result == 0) |
| return emptyString(); |
| if (result < 0) |
| return String(); |
| |
| Vector<char, 256> buffer; |
| unsigned len = result; |
| buffer.grow(len + 1); |
| |
| // Now do the formatting again, guaranteed to fit. |
| vsnprintf(buffer.data(), buffer.size(), format, argsCopy); |
| va_end(argsCopy); |
| |
| #if COMPILER(CLANG) |
| #pragma clang diagnostic pop |
| #endif |
| |
| return StringImpl::create(reinterpret_cast<const LChar*>(buffer.data()), len); |
| } |
| |
| String String::formatWithArguments(const char *format, va_list args) |
| { |
| return createWithFormatAndArguments(format, args); |
| } |
| |
| String String::format(const char *format, ...) |
| { |
| va_list args; |
| va_start(args, format); |
| String result = createWithFormatAndArguments(format, args); |
| va_end(args); |
| return result; |
| } |
| |
| String String::number(int number) |
| { |
| return numberToStringSigned<String>(number); |
| } |
| |
| String String::number(unsigned int number) |
| { |
| return numberToStringUnsigned<String>(number); |
| } |
| |
| String String::number(long number) |
| { |
| return numberToStringSigned<String>(number); |
| } |
| |
| String String::number(unsigned long number) |
| { |
| return numberToStringUnsigned<String>(number); |
| } |
| |
| String String::number(long long number) |
| { |
| return numberToStringSigned<String>(number); |
| } |
| |
| String String::number(unsigned long long number) |
| { |
| return numberToStringUnsigned<String>(number); |
| } |
| |
| String String::number(double number, unsigned precision, TrailingZerosTruncatingPolicy trailingZerosTruncatingPolicy) |
| { |
| NumberToStringBuffer buffer; |
| return String(numberToFixedPrecisionString(number, precision, buffer, trailingZerosTruncatingPolicy == TruncateTrailingZeros)); |
| } |
| |
| String String::numberToStringECMAScript(double number) |
| { |
| NumberToStringBuffer buffer; |
| return String(numberToString(number, buffer)); |
| } |
| |
| String String::numberToStringFixedWidth(double number, unsigned decimalPlaces) |
| { |
| NumberToStringBuffer buffer; |
| return String(numberToFixedWidthString(number, decimalPlaces, buffer)); |
| } |
| |
| int String::toIntStrict(bool* ok, int base) const |
| { |
| if (!m_impl) { |
| if (ok) |
| *ok = false; |
| return 0; |
| } |
| return m_impl->toIntStrict(ok, base); |
| } |
| |
| unsigned String::toUIntStrict(bool* ok, int base) const |
| { |
| if (!m_impl) { |
| if (ok) |
| *ok = false; |
| return 0; |
| } |
| return m_impl->toUIntStrict(ok, base); |
| } |
| |
| int64_t String::toInt64Strict(bool* ok, int base) const |
| { |
| if (!m_impl) { |
| if (ok) |
| *ok = false; |
| return 0; |
| } |
| return m_impl->toInt64Strict(ok, base); |
| } |
| |
| uint64_t String::toUInt64Strict(bool* ok, int base) const |
| { |
| if (!m_impl) { |
| if (ok) |
| *ok = false; |
| return 0; |
| } |
| return m_impl->toUInt64Strict(ok, base); |
| } |
| |
| intptr_t String::toIntPtrStrict(bool* ok, int base) const |
| { |
| if (!m_impl) { |
| if (ok) |
| *ok = false; |
| return 0; |
| } |
| return m_impl->toIntPtrStrict(ok, base); |
| } |
| |
| int String::toInt(bool* ok) const |
| { |
| if (!m_impl) { |
| if (ok) |
| *ok = false; |
| return 0; |
| } |
| return m_impl->toInt(ok); |
| } |
| |
| unsigned String::toUInt(bool* ok) const |
| { |
| if (!m_impl) { |
| if (ok) |
| *ok = false; |
| return 0; |
| } |
| return m_impl->toUInt(ok); |
| } |
| |
| int64_t String::toInt64(bool* ok) const |
| { |
| if (!m_impl) { |
| if (ok) |
| *ok = false; |
| return 0; |
| } |
| return m_impl->toInt64(ok); |
| } |
| |
| uint64_t String::toUInt64(bool* ok) const |
| { |
| if (!m_impl) { |
| if (ok) |
| *ok = false; |
| return 0; |
| } |
| return m_impl->toUInt64(ok); |
| } |
| |
| intptr_t String::toIntPtr(bool* ok) const |
| { |
| if (!m_impl) { |
| if (ok) |
| *ok = false; |
| return 0; |
| } |
| return m_impl->toIntPtr(ok); |
| } |
| |
| double String::toDouble(bool* ok) const |
| { |
| if (!m_impl) { |
| if (ok) |
| *ok = false; |
| return 0.0; |
| } |
| return m_impl->toDouble(ok); |
| } |
| |
| float String::toFloat(bool* ok) const |
| { |
| if (!m_impl) { |
| if (ok) |
| *ok = false; |
| return 0.0f; |
| } |
| return m_impl->toFloat(ok); |
| } |
| |
| String String::isolatedCopy() const & |
| { |
| // FIXME: Should this function, and the many others like it, be inlined? |
| return m_impl ? m_impl->isolatedCopy() : String { }; |
| } |
| |
| String String::isolatedCopy() && |
| { |
| if (isSafeToSendToAnotherThread()) { |
| // Since we know that our string is a temporary that will be destroyed |
| // we can just steal the m_impl from it, thus avoiding a copy. |
| return { WTFMove(*this) }; |
| } |
| |
| return m_impl ? m_impl->isolatedCopy() : String { }; |
| } |
| |
| bool String::isSafeToSendToAnotherThread() const |
| { |
| // AtomicStrings are not safe to send between threads as ~StringImpl() |
| // will try to remove them from the wrong AtomicStringTable. |
| return isEmpty() || (m_impl->hasOneRef() && !m_impl->isAtomic()); |
| } |
| |
| void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const |
| { |
| result.clear(); |
| |
| unsigned startPos = 0; |
| size_t endPos; |
| while ((endPos = find(separator, startPos)) != notFound) { |
| if (allowEmptyEntries || startPos != endPos) |
| result.append(substring(startPos, endPos - startPos)); |
| startPos = endPos + separator.length(); |
| } |
| if (allowEmptyEntries || startPos != length()) |
| result.append(substring(startPos)); |
| } |
| |
| void String::split(UChar separator, bool allowEmptyEntries, const SplitFunctor& functor) const |
| { |
| StringView view(*this); |
| |
| unsigned startPos = 0; |
| size_t endPos; |
| while ((endPos = find(separator, startPos)) != notFound) { |
| if (allowEmptyEntries || startPos != endPos) |
| functor(view.substring(startPos, endPos - startPos)); |
| startPos = endPos + 1; |
| } |
| if (allowEmptyEntries || startPos != length()) |
| functor(view.substring(startPos)); |
| } |
| |
| void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const |
| { |
| result.clear(); |
| split(separator, allowEmptyEntries, [&result](StringView item) { |
| result.append(item.toString()); |
| }); |
| } |
| |
| CString String::ascii() const |
| { |
| // Printable ASCII characters 32..127 and the null character are |
| // preserved, characters outside of this range are converted to '?'. |
| |
| unsigned length = this->length(); |
| if (!length) { |
| char* characterBuffer; |
| return CString::newUninitialized(length, characterBuffer); |
| } |
| |
| if (this->is8Bit()) { |
| const LChar* characters = this->characters8(); |
| |
| char* characterBuffer; |
| CString result = CString::newUninitialized(length, characterBuffer); |
| |
| for (unsigned i = 0; i < length; ++i) { |
| LChar ch = characters[i]; |
| characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; |
| } |
| |
| return result; |
| } |
| |
| const UChar* characters = this->characters16(); |
| |
| char* characterBuffer; |
| CString result = CString::newUninitialized(length, characterBuffer); |
| |
| for (unsigned i = 0; i < length; ++i) { |
| UChar ch = characters[i]; |
| characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; |
| } |
| |
| return result; |
| } |
| |
| CString String::latin1() const |
| { |
| // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are |
| // preserved, characters outside of this range are converted to '?'. |
| |
| unsigned length = this->length(); |
| |
| if (!length) |
| return CString("", 0); |
| |
| if (is8Bit()) |
| return CString(reinterpret_cast<const char*>(this->characters8()), length); |
| |
| const UChar* characters = this->characters16(); |
| |
| char* characterBuffer; |
| CString result = CString::newUninitialized(length, characterBuffer); |
| |
| for (unsigned i = 0; i < length; ++i) { |
| UChar ch = characters[i]; |
| characterBuffer[i] = ch > 0xff ? '?' : ch; |
| } |
| |
| return result; |
| } |
| |
| CString String::utf8(ConversionMode mode) const |
| { |
| return m_impl ? m_impl->utf8(mode) : CString { "", 0 }; |
| } |
| |
| CString String::utf8() const |
| { |
| return utf8(LenientConversion); |
| } |
| |
| String String::make8BitFrom16BitSource(const UChar* source, size_t length) |
| { |
| if (!length) |
| return String(); |
| |
| LChar* destination; |
| String result = String::createUninitialized(length, destination); |
| |
| copyLCharsFromUCharSource(destination, source, length); |
| |
| return result; |
| } |
| |
| String String::make16BitFrom8BitSource(const LChar* source, size_t length) |
| { |
| if (!length) |
| return String(); |
| |
| UChar* destination; |
| String result = String::createUninitialized(length, destination); |
| |
| StringImpl::copyCharacters(destination, source, length); |
| |
| return result; |
| } |
| |
| String String::fromUTF8(const LChar* stringStart, size_t length) |
| { |
| if (length > std::numeric_limits<unsigned>::max()) |
| CRASH(); |
| |
| if (!stringStart) |
| return String(); |
| |
| if (!length) |
| return emptyString(); |
| |
| if (charactersAreAllASCII(stringStart, length)) |
| return StringImpl::create(stringStart, length); |
| |
| Vector<UChar, 1024> buffer(length); |
| UChar* bufferStart = buffer.data(); |
| |
| UChar* bufferCurrent = bufferStart; |
| const char* stringCurrent = reinterpret_cast<const char*>(stringStart); |
| if (convertUTF8ToUTF16(&stringCurrent, reinterpret_cast<const char *>(stringStart + length), &bufferCurrent, bufferCurrent + buffer.size()) != conversionOK) |
| return String(); |
| |
| unsigned utf16Length = bufferCurrent - bufferStart; |
| ASSERT_WITH_SECURITY_IMPLICATION(utf16Length < length); |
| return StringImpl::create(bufferStart, utf16Length); |
| } |
| |
| String String::fromUTF8(const LChar* string) |
| { |
| if (!string) |
| return String(); |
| return fromUTF8(string, strlen(reinterpret_cast<const char*>(string))); |
| } |
| |
| String String::fromUTF8(const CString& s) |
| { |
| return fromUTF8(s.data()); |
| } |
| |
| String String::fromUTF8WithLatin1Fallback(const LChar* string, size_t size) |
| { |
| String utf8 = fromUTF8(string, size); |
| if (!utf8) |
| return String(string, size); |
| return utf8; |
| } |
| |
| // String Operations |
| |
| static bool isCharacterAllowedInBase(UChar c, int base) |
| { |
| if (c > 0x7F) |
| return false; |
| if (isASCIIDigit(c)) |
| return c - '0' < base; |
| if (isASCIIAlpha(c)) { |
| if (base > 36) |
| base = 36; |
| return (c >= 'a' && c < 'a' + base - 10) |
| || (c >= 'A' && c < 'A' + base - 10); |
| } |
| return false; |
| } |
| |
| template<typename IntegralType, typename CharacterType> |
| static inline IntegralType toIntegralType(const CharacterType* data, size_t length, bool* ok, int base) |
| { |
| static const IntegralType integralMax = std::numeric_limits<IntegralType>::max(); |
| static const bool isSigned = std::numeric_limits<IntegralType>::is_signed; |
| const IntegralType maxMultiplier = integralMax / base; |
| |
| IntegralType value = 0; |
| bool isOk = false; |
| bool isNegative = false; |
| |
| if (!data) |
| goto bye; |
| |
| // skip leading whitespace |
| while (length && isSpaceOrNewline(*data)) { |
| --length; |
| ++data; |
| } |
| |
| if (isSigned && length && *data == '-') { |
| --length; |
| ++data; |
| isNegative = true; |
| } else if (length && *data == '+') { |
| --length; |
| ++data; |
| } |
| |
| if (!length || !isCharacterAllowedInBase(*data, base)) |
| goto bye; |
| |
| while (length && isCharacterAllowedInBase(*data, base)) { |
| --length; |
| IntegralType digitValue; |
| auto c = *data; |
| if (isASCIIDigit(c)) |
| digitValue = c - '0'; |
| else if (c >= 'a') |
| digitValue = c - 'a' + 10; |
| else |
| digitValue = c - 'A' + 10; |
| |
| if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative)) |
| goto bye; |
| |
| value = base * value + digitValue; |
| ++data; |
| } |
| |
| #if COMPILER(MSVC) |
| #pragma warning(push, 0) |
| #pragma warning(disable:4146) |
| #endif |
| |
| if (isNegative) |
| value = -value; |
| |
| #if COMPILER(MSVC) |
| #pragma warning(pop) |
| #endif |
| |
| // skip trailing space |
| while (length && isSpaceOrNewline(*data)) { |
| --length; |
| ++data; |
| } |
| |
| if (!length) |
| isOk = true; |
| bye: |
| if (ok) |
| *ok = isOk; |
| return isOk ? value : 0; |
| } |
| |
| template<typename CharacterType> |
| static unsigned lengthOfCharactersAsInteger(const CharacterType* data, size_t length) |
| { |
| size_t i = 0; |
| |
| // Allow leading spaces. |
| for (; i != length; ++i) { |
| if (!isSpaceOrNewline(data[i])) |
| break; |
| } |
| |
| // Allow sign. |
| if (i != length && (data[i] == '+' || data[i] == '-')) |
| ++i; |
| |
| // Allow digits. |
| for (; i != length; ++i) { |
| if (!isASCIIDigit(data[i])) |
| break; |
| } |
| |
| return i; |
| } |
| |
| int charactersToIntStrict(const LChar* data, size_t length, bool* ok, int base) |
| { |
| return toIntegralType<int, LChar>(data, length, ok, base); |
| } |
| |
| int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base) |
| { |
| return toIntegralType<int, UChar>(data, length, ok, base); |
| } |
| |
| unsigned charactersToUIntStrict(const LChar* data, size_t length, bool* ok, int base) |
| { |
| return toIntegralType<unsigned, LChar>(data, length, ok, base); |
| } |
| |
| unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base) |
| { |
| return toIntegralType<unsigned, UChar>(data, length, ok, base); |
| } |
| |
| int64_t charactersToInt64Strict(const LChar* data, size_t length, bool* ok, int base) |
| { |
| return toIntegralType<int64_t, LChar>(data, length, ok, base); |
| } |
| |
| int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base) |
| { |
| return toIntegralType<int64_t, UChar>(data, length, ok, base); |
| } |
| |
| uint64_t charactersToUInt64Strict(const LChar* data, size_t length, bool* ok, int base) |
| { |
| return toIntegralType<uint64_t, LChar>(data, length, ok, base); |
| } |
| |
| uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base) |
| { |
| return toIntegralType<uint64_t, UChar>(data, length, ok, base); |
| } |
| |
| intptr_t charactersToIntPtrStrict(const LChar* data, size_t length, bool* ok, int base) |
| { |
| return toIntegralType<intptr_t, LChar>(data, length, ok, base); |
| } |
| |
| intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, int base) |
| { |
| return toIntegralType<intptr_t, UChar>(data, length, ok, base); |
| } |
| |
| int charactersToInt(const LChar* data, size_t length, bool* ok) |
| { |
| return toIntegralType<int, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10); |
| } |
| |
| int charactersToInt(const UChar* data, size_t length, bool* ok) |
| { |
| return toIntegralType<int, UChar>(data, lengthOfCharactersAsInteger(data, length), ok, 10); |
| } |
| |
| unsigned charactersToUInt(const LChar* data, size_t length, bool* ok) |
| { |
| return toIntegralType<unsigned, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10); |
| } |
| |
| unsigned charactersToUInt(const UChar* data, size_t length, bool* ok) |
| { |
| return toIntegralType<unsigned, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10); |
| } |
| |
| int64_t charactersToInt64(const LChar* data, size_t length, bool* ok) |
| { |
| return toIntegralType<int64_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10); |
| } |
| |
| int64_t charactersToInt64(const UChar* data, size_t length, bool* ok) |
| { |
| return toIntegralType<int64_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10); |
| } |
| |
| uint64_t charactersToUInt64(const LChar* data, size_t length, bool* ok) |
| { |
| return toIntegralType<uint64_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10); |
| } |
| |
| uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok) |
| { |
| return toIntegralType<uint64_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10); |
| } |
| |
| intptr_t charactersToIntPtr(const LChar* data, size_t length, bool* ok) |
| { |
| return toIntegralType<intptr_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10); |
| } |
| |
| intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok) |
| { |
| return toIntegralType<intptr_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10); |
| } |
| |
| enum TrailingJunkPolicy { DisallowTrailingJunk, AllowTrailingJunk }; |
| |
| template<typename CharacterType, TrailingJunkPolicy policy> |
| static inline double toDoubleType(const CharacterType* data, size_t length, bool* ok, size_t& parsedLength) |
| { |
| size_t leadingSpacesLength = 0; |
| while (leadingSpacesLength < length && isASCIISpace(data[leadingSpacesLength])) |
| ++leadingSpacesLength; |
| |
| double number = parseDouble(data + leadingSpacesLength, length - leadingSpacesLength, parsedLength); |
| if (!parsedLength) { |
| if (ok) |
| *ok = false; |
| return 0.0; |
| } |
| |
| parsedLength += leadingSpacesLength; |
| if (ok) |
| *ok = policy == AllowTrailingJunk || parsedLength == length; |
| return number; |
| } |
| |
| double charactersToDouble(const LChar* data, size_t length, bool* ok) |
| { |
| size_t parsedLength; |
| return toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, parsedLength); |
| } |
| |
| double charactersToDouble(const UChar* data, size_t length, bool* ok) |
| { |
| size_t parsedLength; |
| return toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, parsedLength); |
| } |
| |
| float charactersToFloat(const LChar* data, size_t length, bool* ok) |
| { |
| // FIXME: This will return ok even when the string fits into a double but not a float. |
| size_t parsedLength; |
| return static_cast<float>(toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, parsedLength)); |
| } |
| |
| float charactersToFloat(const UChar* data, size_t length, bool* ok) |
| { |
| // FIXME: This will return ok even when the string fits into a double but not a float. |
| size_t parsedLength; |
| return static_cast<float>(toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, parsedLength)); |
| } |
| |
| float charactersToFloat(const LChar* data, size_t length, size_t& parsedLength) |
| { |
| // FIXME: This will return ok even when the string fits into a double but not a float. |
| return static_cast<float>(toDoubleType<LChar, AllowTrailingJunk>(data, length, 0, parsedLength)); |
| } |
| |
| float charactersToFloat(const UChar* data, size_t length, size_t& parsedLength) |
| { |
| // FIXME: This will return ok even when the string fits into a double but not a float. |
| return static_cast<float>(toDoubleType<UChar, AllowTrailingJunk>(data, length, 0, parsedLength)); |
| } |
| |
| const String& emptyString() |
| { |
| static NeverDestroyed<String> emptyString(StringImpl::empty()); |
| return emptyString; |
| } |
| |
| } // namespace WTF |
| |
| #ifndef NDEBUG |
| |
| // For use in the debugger. |
| String* string(const char*); |
| Vector<char> asciiDebug(StringImpl* impl); |
| Vector<char> asciiDebug(String& string); |
| |
| void String::show() const |
| { |
| dataLogF("%s\n", asciiDebug(impl()).data()); |
| } |
| |
| String* string(const char* s) |
| { |
| // Intentionally leaks memory! |
| return new String(s); |
| } |
| |
| Vector<char> asciiDebug(StringImpl* impl) |
| { |
| if (!impl) |
| return asciiDebug(String(ASCIILiteral("[null]")).impl()); |
| |
| Vector<char> buffer; |
| for (unsigned i = 0; i < impl->length(); ++i) { |
| UChar ch = (*impl)[i]; |
| if (isASCIIPrintable(ch)) { |
| if (ch == '\\') |
| buffer.append(ch); |
| buffer.append(ch); |
| } else { |
| buffer.append('\\'); |
| buffer.append('u'); |
| appendUnsignedAsHexFixedSize(ch, buffer, 4); |
| } |
| } |
| buffer.append('\0'); |
| return buffer; |
| } |
| |
| Vector<char> asciiDebug(String& string) |
| { |
| return asciiDebug(string.impl()); |
| } |
| |
| #endif |