blob: a132d95b45186d777050d790334e0849ed55cb02 [file] [log] [blame]
/*
* Copyright (C) 2004, 2006, 2007, 2008 Apple Inc. All rights reserved.
* Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
* Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
* Copyright (C) 2009 Dominik Röttsches <dominik.roettsches@access-company.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "TextCodecGtk.h"
#include <gio/gio.h>
#include <wtf/gobject/GOwnPtr.h>
#include "Logging.h"
#include <wtf/Assertions.h>
#include <wtf/HashMap.h>
#include <wtf/text/CString.h>
#include <wtf/text/WTFString.h>
using std::min;
namespace WebCore {
// TextCodec's appendOmittingBOM() is gone (http://trac.webkit.org/changeset/33380).
// That's why we need to avoid generating extra BOM's for the conversion result.
// This can be achieved by specifying the UTF-16 codecs' endianness explicitly when initializing GLib.
#if (G_BYTE_ORDER == G_BIG_ENDIAN)
static const gchar* internalEncodingName = "UTF-16BE";
#else
static const gchar* internalEncodingName = "UTF-16LE";
#endif
const size_t ConversionBufferSize = 16384;
static PassOwnPtr<TextCodec> newTextCodecGtk(const TextEncoding& encoding, const void*)
{
return adoptPtr(new TextCodecGtk(encoding));
}
static bool isEncodingAvailable(const gchar* encodingName)
{
GIConv tester;
// test decoding
tester = g_iconv_open(internalEncodingName, encodingName);
if (tester == reinterpret_cast<GIConv>(-1)) {
return false;
} else {
g_iconv_close(tester);
// test encoding
tester = g_iconv_open(encodingName, internalEncodingName);
if (tester == reinterpret_cast<GIConv>(-1)) {
return false;
} else {
g_iconv_close(tester);
return true;
}
}
}
static bool registerEncodingNameIfAvailable(EncodingNameRegistrar registrar, const char* canonicalName)
{
if (isEncodingAvailable(canonicalName)) {
registrar(canonicalName, canonicalName);
return true;
}
return false;
}
static void registerEncodingAliasIfAvailable(EncodingNameRegistrar registrar, const char* canonicalName, const char* aliasName)
{
if (isEncodingAvailable(aliasName))
registrar(aliasName, canonicalName);
}
static void registerCodecIfAvailable(TextCodecRegistrar registrar, const char* codecName)
{
if (isEncodingAvailable(codecName))
registrar(codecName, newTextCodecGtk, 0);
}
void TextCodecGtk::registerBaseEncodingNames(EncodingNameRegistrar registrar)
{
// Unicode
registerEncodingNameIfAvailable(registrar, "UTF-8");
registerEncodingNameIfAvailable(registrar, "UTF-32");
registerEncodingNameIfAvailable(registrar, "UTF-32BE");
registerEncodingNameIfAvailable(registrar, "UTF-32LE");
// Western
if (registerEncodingNameIfAvailable(registrar, "ISO-8859-1")) {
registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "CP819");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "IBM819");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "ISO-IR-100");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "ISO8859-1");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "ISO_8859-1");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "ISO_8859-1:1987");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "L1");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "LATIN1");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "CSISOLATIN1");
}
}
void TextCodecGtk::registerBaseCodecs(TextCodecRegistrar registrar)
{
// Unicode
registerCodecIfAvailable(registrar, "UTF-8");
registerCodecIfAvailable(registrar, "UTF-32");
registerCodecIfAvailable(registrar, "UTF-32BE");
registerCodecIfAvailable(registrar, "UTF-32LE");
// Western
registerCodecIfAvailable(registrar, "ISO-8859-1");
}
void TextCodecGtk::registerExtendedEncodingNames(EncodingNameRegistrar registrar)
{
// Western
if (registerEncodingNameIfAvailable(registrar, "MACROMAN")) {
registerEncodingAliasIfAvailable(registrar, "MACROMAN", "MAC");
registerEncodingAliasIfAvailable(registrar, "MACROMAN", "MACINTOSH");
registerEncodingAliasIfAvailable(registrar, "MACROMAN", "CSMACINTOSH");
}
// Japanese
if (registerEncodingNameIfAvailable(registrar, "Shift_JIS")) {
registerEncodingAliasIfAvailable(registrar, "Shift_JIS", "MS_KANJI");
registerEncodingAliasIfAvailable(registrar, "Shift_JIS", "SHIFT-JIS");
registerEncodingAliasIfAvailable(registrar, "Shift_JIS", "SJIS");
registerEncodingAliasIfAvailable(registrar, "Shift_JIS", "CSSHIFTJIS");
}
if (registerEncodingNameIfAvailable(registrar, "EUC-JP")) {
registerEncodingAliasIfAvailable(registrar, "EUC-JP", "EUC_JP");
registerEncodingAliasIfAvailable(registrar, "EUC-JP", "EUCJP");
registerEncodingAliasIfAvailable(registrar, "EUC-JP", "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE");
registerEncodingAliasIfAvailable(registrar, "EUC-JP", "CSEUCPKDFMTJAPANESE");
}
registerEncodingNameIfAvailable(registrar, "ISO-2022-JP");
// Traditional Chinese
if (registerEncodingNameIfAvailable(registrar, "BIG5")) {
registerEncodingAliasIfAvailable(registrar, "BIG5", "BIG-5");
registerEncodingAliasIfAvailable(registrar, "BIG5", "BIG-FIVE");
registerEncodingAliasIfAvailable(registrar, "BIG5", "BIGFIVE");
registerEncodingAliasIfAvailable(registrar, "BIG5", "CN-BIG5");
registerEncodingAliasIfAvailable(registrar, "BIG5", "CSBIG5");
}
if (registerEncodingNameIfAvailable(registrar, "BIG5-HKSCS")) {
registerEncodingAliasIfAvailable(registrar, "BIG5-HKSCS", "BIG5-HKSCS:2004");
registerEncodingAliasIfAvailable(registrar, "BIG5-HKSCS", "BIG5HKSCS");
}
registerEncodingNameIfAvailable(registrar, "CP950");
// Korean
if (registerEncodingNameIfAvailable(registrar, "ISO-2022-KR"))
registerEncodingAliasIfAvailable(registrar, "ISO-2022-KR", "CSISO2022KR");
if (registerEncodingNameIfAvailable(registrar, "CP949"))
registerEncodingAliasIfAvailable(registrar, "CP949", "UHC");
if (registerEncodingNameIfAvailable(registrar, "EUC-KR"))
registerEncodingAliasIfAvailable(registrar, "EUC-KR", "CSEUCKR");
// Arabic
if (registerEncodingNameIfAvailable(registrar, "ISO-8859-6")) {
registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ARABIC");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ASMO-708");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ECMA-114");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ISO-IR-127");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ISO8859-6");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ISO_8859-6");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ISO_8859-6:1987");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "CSISOLATINARABIC");
}
// rearranged, windows-1256 now declared the canonical name and put to lowercase to fix /fast/encoding/ahram-org-eg.html test case
if (registerEncodingNameIfAvailable(registrar, "windows-1256")) {
registerEncodingAliasIfAvailable(registrar, "windows-1256", "CP1256");
registerEncodingAliasIfAvailable(registrar, "windows-1256", "MS-ARAB");
}
// Hebrew
if (registerEncodingNameIfAvailable(registrar, "ISO-8859-8")) {
registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "HEBREW");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "ISO-8859-8");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "ISO-IR-138");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "ISO8859-8");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "ISO_8859-8");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "ISO_8859-8:1988");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "CSISOLATINHEBREW");
}
// rearranged, moved windows-1255 as canonical and lowercased, fixing /fast/encoding/meta-charset.html
if (registerEncodingNameIfAvailable(registrar, "windows-1255")) {
registerEncodingAliasIfAvailable(registrar, "windows-1255", "CP1255");
registerEncodingAliasIfAvailable(registrar, "windows-1255", "MS-HEBR");
}
// Greek
if (registerEncodingNameIfAvailable(registrar, "ISO-8859-7")) {
registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ECMA-118");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ELOT_928");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "GREEK");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "GREEK8");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ISO-IR-126");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ISO8859-7");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ISO_8859-7");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ISO_8859-7:1987");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ISO_8859-7:2003");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "CSI");
}
if (registerEncodingNameIfAvailable(registrar, "CP869")) {
registerEncodingAliasIfAvailable(registrar, "CP869", "869");
registerEncodingAliasIfAvailable(registrar, "CP869", "CP-GR");
registerEncodingAliasIfAvailable(registrar, "CP869", "IBM869");
registerEncodingAliasIfAvailable(registrar, "CP869", "CSIBM869");
}
registerEncodingNameIfAvailable(registrar, "WINDOWS-1253");
// Cyrillic
if (registerEncodingNameIfAvailable(registrar, "ISO-8859-5")) {
registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "CYRILLIC");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "ISO-IR-144");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "ISO8859-5");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "ISO_8859-5");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "ISO_8859-5:1988");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "CSISOLATINCYRILLIC");
}
if (registerEncodingNameIfAvailable(registrar, "KOI8-R"))
registerEncodingAliasIfAvailable(registrar, "KOI8-R", "CSKOI8R");
if (registerEncodingNameIfAvailable(registrar, "CP866")) {
registerEncodingAliasIfAvailable(registrar, "CP866", "866");
registerEncodingAliasIfAvailable(registrar, "CP866", "IBM866");
registerEncodingAliasIfAvailable(registrar, "CP866", "CSIBM866");
}
registerEncodingNameIfAvailable(registrar, "KOI8-U");
// CP1251 added to pass /fast/encoding/charset-cp1251.html
if (registerEncodingNameIfAvailable(registrar, "windows-1251"))
registerEncodingAliasIfAvailable(registrar, "windows-1251", "CP1251");
if (registerEncodingNameIfAvailable(registrar, "mac-cyrillic")) {
registerEncodingAliasIfAvailable(registrar, "mac-cyrillic", "MACCYRILLIC");
registerEncodingAliasIfAvailable(registrar, "mac-cyrillic", "x-mac-cyrillic");
}
// Thai
if (registerEncodingNameIfAvailable(registrar, "CP874"))
registerEncodingAliasIfAvailable(registrar, "CP874", "WINDOWS-874");
registerEncodingNameIfAvailable(registrar, "TIS-620");
// Simplified Chinese
registerEncodingNameIfAvailable(registrar, "GBK");
if (registerEncodingNameIfAvailable(registrar, "HZ"))
registerEncodingAliasIfAvailable(registrar, "HZ", "HZ-GB-2312");
registerEncodingNameIfAvailable(registrar, "GB18030");
if (registerEncodingNameIfAvailable(registrar, "EUC-CN")) {
registerEncodingAliasIfAvailable(registrar, "EUC-CN", "EUCCN");
registerEncodingAliasIfAvailable(registrar, "EUC-CN", "GB2312");
registerEncodingAliasIfAvailable(registrar, "EUC-CN", "CN-GB");
registerEncodingAliasIfAvailable(registrar, "EUC-CN", "CSGB2312");
registerEncodingAliasIfAvailable(registrar, "EUC-CN", "EUC_CN");
}
if (registerEncodingNameIfAvailable(registrar, "GB_2312-80")) {
registerEncodingAliasIfAvailable(registrar, "GB_2312-80", "CHINESE");
registerEncodingAliasIfAvailable(registrar, "GB_2312-80", "csISO58GB231280");
registerEncodingAliasIfAvailable(registrar, "GB_2312-80", "GB2312.1980-0");
registerEncodingAliasIfAvailable(registrar, "GB_2312-80", "ISO-IR-58");
}
// Central European
if (registerEncodingNameIfAvailable(registrar, "ISO-8859-2")) {
registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "ISO-IR-101");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "ISO8859-2");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "ISO_8859-2");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "ISO_8859-2:1987");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "L2");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "LATIN2");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "CSISOLATIN2");
}
if (registerEncodingNameIfAvailable(registrar, "CP1250")) {
registerEncodingAliasIfAvailable(registrar, "CP1250", "MS-EE");
registerEncodingAliasIfAvailable(registrar, "CP1250", "WINDOWS-1250");
}
registerEncodingNameIfAvailable(registrar, "MAC-CENTRALEUROPE");
// Vietnamese
if (registerEncodingNameIfAvailable(registrar, "CP1258"))
registerEncodingAliasIfAvailable(registrar, "CP1258", "WINDOWS-1258");
// Turkish
if (registerEncodingNameIfAvailable(registrar, "CP1254")) {
registerEncodingAliasIfAvailable(registrar, "CP1254", "MS-TURK");
registerEncodingAliasIfAvailable(registrar, "CP1254", "WINDOWS-1254");
}
if (registerEncodingNameIfAvailable(registrar, "ISO-8859-9")) {
registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "ISO-IR-148");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "ISO8859-9");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "ISO_8859-9");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "ISO_8859-9:1989");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "L5");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "LATIN5");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "CSISOLATIN5");
}
// Baltic
if (registerEncodingNameIfAvailable(registrar, "CP1257")) {
registerEncodingAliasIfAvailable(registrar, "CP1257", "WINBALTRIM");
registerEncodingAliasIfAvailable(registrar, "CP1257", "WINDOWS-1257");
}
if (registerEncodingNameIfAvailable(registrar, "ISO-8859-4")) {
registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "ISO-IR-110");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "ISO8859-4");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "ISO_8859-4");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "ISO_8859-4:1988");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "L4");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "LATIN4");
registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "CSISOLATIN4");
}
}
void TextCodecGtk::registerExtendedCodecs(TextCodecRegistrar registrar)
{
// Western
registerCodecIfAvailable(registrar, "MACROMAN");
// Japanese
registerCodecIfAvailable(registrar, "Shift_JIS");
registerCodecIfAvailable(registrar, "EUC-JP");
registerCodecIfAvailable(registrar, "ISO-2022-JP");
// Traditional Chinese
registerCodecIfAvailable(registrar, "BIG5");
registerCodecIfAvailable(registrar, "BIG5-HKSCS");
registerCodecIfAvailable(registrar, "CP950");
// Korean
registerCodecIfAvailable(registrar, "ISO-2022-KR");
registerCodecIfAvailable(registrar, "CP949");
registerCodecIfAvailable(registrar, "EUC-KR");
// Arabic
registerCodecIfAvailable(registrar, "ISO-8859-6");
// rearranged, windows-1256 now declared the canonical name and put to lowercase to fix /fast/encoding/ahram-org-eg.html test case
registerCodecIfAvailable(registrar, "windows-1256");
// Hebrew
registerCodecIfAvailable(registrar, "ISO-8859-8");
// rearranged, moved windows-1255 as canonical and lowercased, fixing /fast/encoding/meta-charset.html
registerCodecIfAvailable(registrar, "windows-1255");
// Greek
registerCodecIfAvailable(registrar, "ISO-8859-7");
registerCodecIfAvailable(registrar, "CP869");
registerCodecIfAvailable(registrar, "WINDOWS-1253");
// Cyrillic
registerCodecIfAvailable(registrar, "ISO-8859-5");
registerCodecIfAvailable(registrar, "KOI8-R");
registerCodecIfAvailable(registrar, "CP866");
registerCodecIfAvailable(registrar, "KOI8-U");
// CP1251 added to pass /fast/encoding/charset-cp1251.html
registerCodecIfAvailable(registrar, "windows-1251");
registerCodecIfAvailable(registrar, "mac-cyrillic");
// Thai
registerCodecIfAvailable(registrar, "CP874");
registerCodecIfAvailable(registrar, "TIS-620");
// Simplified Chinese
registerCodecIfAvailable(registrar, "GBK");
registerCodecIfAvailable(registrar, "HZ");
registerCodecIfAvailable(registrar, "GB18030");
registerCodecIfAvailable(registrar, "EUC-CN");
registerCodecIfAvailable(registrar, "GB_2312-80");
// Central European
registerCodecIfAvailable(registrar, "ISO-8859-2");
registerCodecIfAvailable(registrar, "CP1250");
registerCodecIfAvailable(registrar, "MAC-CENTRALEUROPE");
// Vietnamese
registerCodecIfAvailable(registrar, "CP1258");
// Turkish
registerCodecIfAvailable(registrar, "CP1254");
registerCodecIfAvailable(registrar, "ISO-8859-9");
// Baltic
registerCodecIfAvailable(registrar, "CP1257");
registerCodecIfAvailable(registrar, "ISO-8859-4");
}
TextCodecGtk::TextCodecGtk(const TextEncoding& encoding)
: m_encoding(encoding)
, m_numBufferedBytes(0)
{
}
TextCodecGtk::~TextCodecGtk()
{
}
void TextCodecGtk::createIConvDecoder() const
{
ASSERT(!m_iconvDecoder);
m_iconvDecoder = adoptGRef(g_charset_converter_new(internalEncodingName, m_encoding.name(), 0));
}
void TextCodecGtk::createIConvEncoder() const
{
ASSERT(!m_iconvEncoder);
m_iconvEncoder = adoptGRef(g_charset_converter_new(m_encoding.name(), internalEncodingName, 0));
}
String TextCodecGtk::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
{
// Get a converter for the passed-in encoding.
if (!m_iconvDecoder)
createIConvDecoder();
if (!m_iconvDecoder) {
LOG_ERROR("Error creating IConv encoder even though encoding was in table.");
return String();
}
Vector<UChar> result;
gsize bytesRead = 0;
gsize bytesWritten = 0;
const gchar* input = bytes;
gsize inputLength = length;
gchar buffer[ConversionBufferSize];
int flags = !length ? G_CONVERTER_INPUT_AT_END : G_CONVERTER_NO_FLAGS;
if (flush)
flags |= G_CONVERTER_FLUSH;
bool bufferWasFull = false;
char* prefixedBytes = 0;
if (m_numBufferedBytes) {
inputLength = length + m_numBufferedBytes;
prefixedBytes = static_cast<char*>(fastMalloc(inputLength));
memcpy(prefixedBytes, m_bufferedBytes, m_numBufferedBytes);
memcpy(prefixedBytes + m_numBufferedBytes, bytes, length);
input = prefixedBytes;
// all buffered bytes are consumed now
m_numBufferedBytes = 0;
}
do {
GOwnPtr<GError> error;
GConverterResult res = g_converter_convert(G_CONVERTER(m_iconvDecoder.get()),
input, inputLength,
buffer, sizeof(buffer),
static_cast<GConverterFlags>(flags),
&bytesRead, &bytesWritten,
&error.outPtr());
input += bytesRead;
inputLength -= bytesRead;
if (res == G_CONVERTER_ERROR) {
if (g_error_matches(error.get(), G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT)) {
// There is not enough input to fully determine what the conversion should produce,
// save it to a buffer to prepend it to the next input.
memcpy(m_bufferedBytes, input, inputLength);
m_numBufferedBytes = inputLength;
inputLength = 0;
} else if (g_error_matches(error.get(), G_IO_ERROR, G_IO_ERROR_NO_SPACE))
bufferWasFull = true;
else if (g_error_matches(error.get(), G_IO_ERROR, G_IO_ERROR_INVALID_DATA)) {
if (stopOnError)
sawError = true;
if (inputLength) {
// Ignore invalid character.
input += 1;
inputLength -= 1;
}
} else {
sawError = true;
LOG_ERROR("GIConv conversion error, Code %d: \"%s\"", error->code, error->message);
m_numBufferedBytes = 0; // Reset state for subsequent calls to decode.
fastFree(prefixedBytes);
return String();
}
}
result.append(reinterpret_cast<UChar*>(buffer), bytesWritten / sizeof(UChar));
} while ((inputLength || bufferWasFull) && !sawError);
fastFree(prefixedBytes);
return String::adopt(result);
}
CString TextCodecGtk::encode(const UChar* characters, size_t length, UnencodableHandling handling)
{
if (!length)
return "";
if (!m_iconvEncoder)
createIConvEncoder();
if (!m_iconvEncoder) {
LOG_ERROR("Error creating IConv encoder even though encoding was in table.");
return CString();
}
gsize bytesRead = 0;
gsize bytesWritten = 0;
const gchar* input = reinterpret_cast<const char*>(characters);
gsize inputLength = length * sizeof(UChar);
gchar buffer[ConversionBufferSize];
Vector<char> result;
GOwnPtr<GError> error;
size_t size = 0;
do {
g_converter_convert(G_CONVERTER(m_iconvEncoder.get()),
input, inputLength,
buffer, sizeof(buffer),
G_CONVERTER_INPUT_AT_END,
&bytesRead, &bytesWritten,
&error.outPtr());
input += bytesRead;
inputLength -= bytesRead;
if (bytesWritten > 0) {
result.grow(size + bytesWritten);
memcpy(result.data() + size, buffer, bytesWritten);
size += bytesWritten;
}
if (error && g_error_matches(error.get(), G_IO_ERROR, G_IO_ERROR_INVALID_DATA)) {
UChar codePoint = reinterpret_cast<const UChar*>(input)[0];
UnencodableReplacementArray replacement;
int replacementLength = TextCodec::getUnencodableReplacement(codePoint, handling, replacement);
// Consume the invalid character.
input += sizeof(UChar);
inputLength -= sizeof(UChar);
// Append replacement string to result buffer.
result.grow(size + replacementLength);
memcpy(result.data() + size, replacement, replacementLength);
size += replacementLength;
error.clear();
}
} while (inputLength && !error.get());
if (error) {
LOG_ERROR("GIConv conversion error, Code %d: \"%s\"", error->code, error->message);
return CString();
}
return CString(result.data(), size);
}
} // namespace WebCore