| /* |
| * Copyright (C) 2004-2020 Apple Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "config.h" |
| #include "TextCodecUTF16.h" |
| |
| #include <wtf/text/CString.h> |
| #include <wtf/text/StringBuilder.h> |
| #include <wtf/text/WTFString.h> |
| #include <wtf/unicode/CharacterNames.h> |
| |
| namespace WebCore { |
| |
| inline TextCodecUTF16::TextCodecUTF16(bool littleEndian) |
| : m_littleEndian(littleEndian) |
| { |
| } |
| |
| void TextCodecUTF16::registerEncodingNames(EncodingNameRegistrar registrar) |
| { |
| registrar("UTF-16LE", "UTF-16LE"); |
| registrar("UTF-16BE", "UTF-16BE"); |
| |
| registrar("ISO-10646-UCS-2", "UTF-16LE"); |
| registrar("UCS-2", "UTF-16LE"); |
| registrar("UTF-16", "UTF-16LE"); |
| registrar("Unicode", "UTF-16LE"); |
| registrar("csUnicode", "UTF-16LE"); |
| registrar("unicodeFEFF", "UTF-16LE"); |
| |
| registrar("unicodeFFFE", "UTF-16BE"); |
| } |
| |
| void TextCodecUTF16::registerCodecs(TextCodecRegistrar registrar) |
| { |
| registrar("UTF-16LE", [] { |
| return makeUnique<TextCodecUTF16>(true); |
| }); |
| registrar("UTF-16BE", [] { |
| return makeUnique<TextCodecUTF16>(false); |
| }); |
| } |
| |
| // https://encoding.spec.whatwg.org/#shared-utf-16-decoder |
| String TextCodecUTF16::decode(const char* bytes, size_t length, bool flush, bool, bool& sawError) |
| { |
| const auto* p = reinterpret_cast<const uint8_t*>(bytes); |
| const auto* const end = p + length; |
| const auto* const endMinusOneOrNull = end ? end - 1 : nullptr; |
| |
| StringBuilder result; |
| result.reserveCapacity(length / 2); |
| |
| auto processCodeUnit = [&] (UChar codeUnit) { |
| if (std::exchange(m_shouldStripByteOrderMark, false) && codeUnit == byteOrderMark) |
| return; |
| if (m_leadSurrogate) { |
| auto leadSurrogate = *std::exchange(m_leadSurrogate, std::nullopt); |
| if (U16_IS_TRAIL(codeUnit)) { |
| result.appendCharacter(U16_GET_SUPPLEMENTARY(leadSurrogate, codeUnit)); |
| return; |
| } |
| sawError = true; |
| result.append(replacementCharacter); |
| } |
| if (U16_IS_LEAD(codeUnit)) { |
| m_leadSurrogate = codeUnit; |
| return; |
| } |
| if (U16_IS_TRAIL(codeUnit)) { |
| sawError = true; |
| result.append(replacementCharacter); |
| return; |
| } |
| result.append(codeUnit); |
| }; |
| auto processBytesLE = [&] (uint8_t first, uint8_t second) { |
| processCodeUnit(first | (second << 8)); |
| }; |
| auto processBytesBE = [&] (uint8_t first, uint8_t second) { |
| processCodeUnit((first << 8) | second); |
| }; |
| |
| if (m_leadByte && p < end) { |
| auto leadByte = *std::exchange(m_leadByte, std::nullopt); |
| if (m_littleEndian) |
| processBytesLE(leadByte, p[0]); |
| else |
| processBytesBE(leadByte, p[0]); |
| p++; |
| } |
| |
| if (m_littleEndian) { |
| while (p < endMinusOneOrNull) { |
| processBytesLE(p[0], p[1]); |
| p += 2; |
| } |
| } else { |
| while (p < endMinusOneOrNull) { |
| processBytesBE(p[0], p[1]); |
| p += 2; |
| } |
| } |
| |
| if (p && p == endMinusOneOrNull) { |
| ASSERT(!m_leadByte); |
| m_leadByte = p[0]; |
| } else |
| ASSERT(!p || p == end); |
| |
| if (flush) { |
| m_shouldStripByteOrderMark = false; |
| if (m_leadByte || m_leadSurrogate) { |
| m_leadByte = std::nullopt; |
| m_leadSurrogate = std::nullopt; |
| sawError = true; |
| result.append(replacementCharacter); |
| } |
| } |
| |
| return result.toString(); |
| } |
| |
| Vector<uint8_t> TextCodecUTF16::encode(StringView string, UnencodableHandling) const |
| { |
| Vector<uint8_t> result(WTF::checkedProduct<size_t>(string.length(), 2)); |
| auto* bytes = result.data(); |
| |
| if (m_littleEndian) { |
| for (auto character : string.codeUnits()) { |
| *bytes++ = character; |
| *bytes++ = character >> 8; |
| } |
| } else { |
| for (auto character : string.codeUnits()) { |
| *bytes++ = character >> 8; |
| *bytes++ = character; |
| } |
| } |
| |
| return result; |
| } |
| |
| } // namespace WebCore |