| /* |
| * Copyright (C) 2006, 2007 Apple Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY |
| * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "config.h" |
| #include "TextEncodingRegistry.h" |
| |
| #include "PlatformString.h" |
| #include "TextCodecLatin1.h" |
| #include "TextCodecUTF16.h" |
| #include <ctype.h> |
| #include <wtf/Assertions.h> |
| #include <wtf/HashMap.h> |
| |
| #if USE(ICU_UNICODE) |
| #include "TextCodecICU.h" |
| #endif |
| #if PLATFORM(MAC) |
| #include "TextCodecMac.h" |
| #endif |
| #if PLATFORM(QT) |
| #include "qt/TextCodecQt.h" |
| #endif |
| |
| namespace WebCore { |
| |
| const size_t maxEncodingNameLength = 63; |
| |
| // Hash for all-ASCII strings that does case folding and skips any characters |
| // that are not alphanumeric. If passed any non-ASCII characters, depends on |
| // the behavior of isalnum -- if that returns false as it does on OS X, then |
| // it will properly skip those characters too. |
| struct TextEncodingNameHash { |
| |
| // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's |
| // or anything like that. |
| static const unsigned PHI = 0x9e3779b9U; |
| |
| static bool equal(const char* s1, const char* s2) |
| { |
| char c1; |
| char c2; |
| do { |
| do |
| c1 = *s1++; |
| while (c1 && !isalnum(c1)); |
| do |
| c2 = *s2++; |
| while (c2 && !isalnum(c2)); |
| if (tolower(c1) != tolower(c2)) |
| return false; |
| } while (c1 && c2); |
| return !c1 && !c2; |
| } |
| |
| // This algorithm is the one-at-a-time hash from: |
| // http://burtleburtle.net/bob/hash/hashfaq.html |
| // http://burtleburtle.net/bob/hash/doobs.html |
| static unsigned hash(const char* s) |
| { |
| unsigned h = PHI; |
| for (;;) { |
| char c; |
| do { |
| c = *s++; |
| if (!c) { |
| h += (h << 3); |
| h ^= (h >> 11); |
| h += (h << 15); |
| return h; |
| } |
| } while (!isalnum(c)); |
| h += tolower(c); |
| h += (h << 10); |
| h ^= (h >> 6); |
| } |
| } |
| |
| }; |
| |
| struct TextCodecFactory { |
| NewTextCodecFunction function; |
| const void* additionalData; |
| TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0) : function(f), additionalData(d) { } |
| }; |
| |
| typedef HashMap<const char*, const char*, TextEncodingNameHash> TextEncodingNameMap; |
| typedef HashMap<const char*, TextCodecFactory> TextCodecMap; |
| |
| static TextEncodingNameMap* textEncodingNameMap; |
| static TextCodecMap* textCodecMap; |
| static bool didExtendTextCodecMaps; |
| |
| #if ERROR_DISABLED |
| |
| static inline void checkExistingName(const char*, const char*) { } |
| |
| #else |
| |
| static void checkExistingName(const char* alias, const char* atomicName) |
| { |
| const char* oldAtomicName = textEncodingNameMap->get(alias); |
| if (!oldAtomicName) |
| return; |
| if (oldAtomicName == atomicName) |
| return; |
| // Keep the warning silent about one case where we know this will happen. |
| if (strcmp(alias, "ISO-8859-8-I") == 0 |
| && strcmp(oldAtomicName, "ISO-8859-8-I") == 0 |
| && strcmp(atomicName, "ISO_8859-8:1988") == 0) |
| return; |
| LOG_ERROR("alias %s maps to %s already, but someone is trying to make it map to %s", |
| alias, oldAtomicName, atomicName); |
| } |
| |
| #endif |
| |
| static void addToTextEncodingNameMap(const char* alias, const char* name) |
| { |
| ASSERT(strlen(alias) <= maxEncodingNameLength); |
| const char* atomicName = textEncodingNameMap->get(name); |
| ASSERT(strcmp(alias, name) == 0 || atomicName); |
| if (!atomicName) |
| atomicName = name; |
| checkExistingName(alias, atomicName); |
| textEncodingNameMap->add(alias, atomicName); |
| } |
| |
| static void addToTextCodecMap(const char* name, NewTextCodecFunction function, const void* additionalData) |
| { |
| TextEncoding encoding(name); |
| ASSERT(encoding.isValid()); |
| textCodecMap->add(encoding.name(), TextCodecFactory(function, additionalData)); |
| } |
| |
| static void buildBaseTextCodecMaps() |
| { |
| textCodecMap = new TextCodecMap; |
| textEncodingNameMap = new TextEncodingNameMap; |
| |
| TextCodecLatin1::registerEncodingNames(addToTextEncodingNameMap); |
| TextCodecLatin1::registerCodecs(addToTextCodecMap); |
| |
| TextCodecUTF16::registerEncodingNames(addToTextEncodingNameMap); |
| TextCodecUTF16::registerCodecs(addToTextCodecMap); |
| |
| #if USE(ICU_UNICODE) |
| TextCodecICU::registerBaseEncodingNames(addToTextEncodingNameMap); |
| TextCodecICU::registerBaseCodecs(addToTextCodecMap); |
| #endif |
| } |
| |
| static void extendTextCodecMaps() |
| { |
| #if USE(ICU_UNICODE) |
| TextCodecICU::registerExtendedEncodingNames(addToTextEncodingNameMap); |
| TextCodecICU::registerExtendedCodecs(addToTextCodecMap); |
| #endif |
| |
| #if USE(QT4_UNICODE) |
| TextCodecQt::registerEncodingNames(addToTextEncodingNameMap); |
| TextCodecQt::registerCodecs(addToTextCodecMap); |
| #endif |
| |
| #if PLATFORM(MAC) |
| TextCodecMac::registerEncodingNames(addToTextEncodingNameMap); |
| TextCodecMac::registerCodecs(addToTextCodecMap); |
| #endif |
| } |
| |
| std::auto_ptr<TextCodec> newTextCodec(const TextEncoding& encoding) |
| { |
| ASSERT(textCodecMap); |
| TextCodecFactory factory = textCodecMap->get(encoding.name()); |
| ASSERT(factory.function); |
| return factory.function(encoding, factory.additionalData); |
| } |
| |
| const char* atomicCanonicalTextEncodingName(const char* name) |
| { |
| if (!name || !name[0]) |
| return 0; |
| if (!textEncodingNameMap) |
| buildBaseTextCodecMaps(); |
| if (const char* atomicName = textEncodingNameMap->get(name)) |
| return atomicName; |
| if (didExtendTextCodecMaps) |
| return 0; |
| extendTextCodecMaps(); |
| didExtendTextCodecMaps = true; |
| return textEncodingNameMap->get(name); |
| } |
| |
| const char* atomicCanonicalTextEncodingName(const UChar* characters, size_t length) |
| { |
| char buffer[maxEncodingNameLength + 1]; |
| size_t j = 0; |
| for (size_t i = 0; i < length; ++i) { |
| UChar c = characters[i]; |
| if (isalnum(c)) { |
| if (j == maxEncodingNameLength) |
| return 0; |
| buffer[j++] = c; |
| } |
| } |
| buffer[j] = 0; |
| return atomicCanonicalTextEncodingName(buffer); |
| } |
| |
| bool noExtendedTextEncodingNameUsed() |
| { |
| return !didExtendTextCodecMaps; |
| } |
| |
| } // namespace WebCore |