blob: f00a66b082275f50c20fff6af82613b68ae30ae9 [file] [log] [blame]
/*
* Copyright (C) 2015 Andy VanWagoner (andy@vanwagoner.family)
* Copyright (C) 2015 Sukolsak Sakshuwong (sukolsak@gmail.com)
* Copyright (C) 2016-2020 Apple Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "IntlCollator.h"
#include "IntlObject.h"
#include "JSBoundFunction.h"
#include "JSCInlines.h"
#include "ObjectConstructor.h"
#include <unicode/ucol.h>
namespace JSC {
const ClassInfo IntlCollator::s_info = { "Object", &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(IntlCollator) };
namespace IntlCollatorInternal {
constexpr const char* relevantExtensionKeys[3] = { "co", "kf", "kn" };
constexpr size_t collationIndex = 0;
constexpr size_t caseFirstIndex = 1;
constexpr size_t numericIndex = 2;
}
void IntlCollator::UCollatorDeleter::operator()(UCollator* collator) const
{
if (collator)
ucol_close(collator);
}
IntlCollator* IntlCollator::create(VM& vm, Structure* structure)
{
IntlCollator* format = new (NotNull, allocateCell<IntlCollator>(vm.heap)) IntlCollator(vm, structure);
format->finishCreation(vm);
return format;
}
Structure* IntlCollator::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype)
{
return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info());
}
IntlCollator::IntlCollator(VM& vm, Structure* structure)
: Base(vm, structure)
{
}
void IntlCollator::finishCreation(VM& vm)
{
Base::finishCreation(vm);
ASSERT(inherits(vm, info()));
}
void IntlCollator::visitChildren(JSCell* cell, SlotVisitor& visitor)
{
IntlCollator* thisObject = jsCast<IntlCollator*>(cell);
ASSERT_GC_OBJECT_INHERITS(thisObject, info());
Base::visitChildren(thisObject, visitor);
visitor.append(thisObject->m_boundCompare);
}
Vector<String> IntlCollator::sortLocaleData(const String& locale, size_t keyIndex)
{
// 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0)
Vector<String> keyLocaleData;
switch (keyIndex) {
case IntlCollatorInternal::collationIndex: {
// 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values."
keyLocaleData.append({ });
UErrorCode status = U_ZERO_ERROR;
UEnumeration* enumeration = ucol_getKeywordValuesForLocale("collation", locale.utf8().data(), false, &status);
if (U_SUCCESS(status)) {
const char* collation;
while ((collation = uenum_next(enumeration, nullptr, &status)) && U_SUCCESS(status)) {
// 10.2.3 "The values "standard" and "search" must not be used as elements in any [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co array."
if (!strcmp(collation, "standard") || !strcmp(collation, "search"))
continue;
// Map keyword values to BCP 47 equivalents.
if (!strcmp(collation, "dictionary"))
collation = "dict";
else if (!strcmp(collation, "gb2312han"))
collation = "gb2312";
else if (!strcmp(collation, "phonebook"))
collation = "phonebk";
else if (!strcmp(collation, "traditional"))
collation = "trad";
keyLocaleData.append(collation);
}
uenum_close(enumeration);
}
break;
}
case IntlCollatorInternal::caseFirstIndex:
keyLocaleData.reserveInitialCapacity(3);
keyLocaleData.uncheckedAppend("false"_s);
keyLocaleData.uncheckedAppend("lower"_s);
keyLocaleData.uncheckedAppend("upper"_s);
break;
case IntlCollatorInternal::numericIndex:
keyLocaleData.reserveInitialCapacity(2);
keyLocaleData.uncheckedAppend("false"_s);
keyLocaleData.uncheckedAppend("true"_s);
break;
default:
ASSERT_NOT_REACHED();
}
return keyLocaleData;
}
Vector<String> IntlCollator::searchLocaleData(const String&, size_t keyIndex)
{
// 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0)
Vector<String> keyLocaleData;
switch (keyIndex) {
case IntlCollatorInternal::collationIndex:
// 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values."
keyLocaleData.reserveInitialCapacity(1);
keyLocaleData.append({ });
break;
case IntlCollatorInternal::caseFirstIndex:
keyLocaleData.reserveInitialCapacity(3);
keyLocaleData.uncheckedAppend("false"_s);
keyLocaleData.uncheckedAppend("lower"_s);
keyLocaleData.uncheckedAppend("upper"_s);
break;
case IntlCollatorInternal::numericIndex:
keyLocaleData.reserveInitialCapacity(2);
keyLocaleData.uncheckedAppend("false"_s);
keyLocaleData.uncheckedAppend("true"_s);
break;
default:
ASSERT_NOT_REACHED();
}
return keyLocaleData;
}
// https://tc39.github.io/ecma402/#sec-initializecollator
void IntlCollator::initializeCollator(JSGlobalObject* globalObject, JSValue locales, JSValue optionsValue)
{
VM& vm = globalObject->vm();
auto scope = DECLARE_THROW_SCOPE(vm);
auto requestedLocales = canonicalizeLocaleList(globalObject, locales);
RETURN_IF_EXCEPTION(scope, void());
JSValue options = optionsValue;
if (!optionsValue.isUndefined()) {
options = optionsValue.toObject(globalObject);
RETURN_IF_EXCEPTION(scope, void());
}
String usageString = intlStringOption(globalObject, options, vm.propertyNames->usage, { "sort", "search" }, "usage must be either \"sort\" or \"search\"", "sort");
RETURN_IF_EXCEPTION(scope, void());
if (usageString == "sort")
m_usage = Usage::Sort;
else if (usageString == "search")
m_usage = Usage::Search;
else
ASSERT_NOT_REACHED();
auto localeData = (m_usage == Usage::Sort) ? sortLocaleData : searchLocaleData;
HashMap<String, String> opt;
String matcher = intlStringOption(globalObject, options, vm.propertyNames->localeMatcher, { "lookup", "best fit" }, "localeMatcher must be either \"lookup\" or \"best fit\"", "best fit");
RETURN_IF_EXCEPTION(scope, void());
opt.add("localeMatcher"_s, matcher);
TriState numeric = intlBooleanOption(globalObject, options, vm.propertyNames->numeric);
RETURN_IF_EXCEPTION(scope, void());
if (numeric != TriState::Indeterminate)
opt.add("kn"_s, numeric == TriState::True ? "true"_s : "false"_s);
String caseFirstOption = intlStringOption(globalObject, options, vm.propertyNames->caseFirst, { "upper", "lower", "false" }, "caseFirst must be either \"upper\", \"lower\", or \"false\"", nullptr);
RETURN_IF_EXCEPTION(scope, void());
if (!caseFirstOption.isNull())
opt.add("kf"_s, caseFirstOption);
auto& availableLocales = intlCollatorAvailableLocales();
auto result = resolveLocale(globalObject, availableLocales, requestedLocales, opt, IntlCollatorInternal::relevantExtensionKeys, WTF_ARRAY_LENGTH(IntlCollatorInternal::relevantExtensionKeys), localeData);
m_locale = result.get("locale"_s);
if (m_locale.isEmpty()) {
throwTypeError(globalObject, scope, "failed to initialize Collator due to invalid locale"_s);
return;
}
const String& collation = result.get("co"_s);
m_collation = collation.isNull() ? "default"_s : collation;
m_numeric = result.get("kn"_s) == "true";
const String& caseFirstString = result.get("kf"_s);
if (caseFirstString == "lower")
m_caseFirst = CaseFirst::Lower;
else if (caseFirstString == "upper")
m_caseFirst = CaseFirst::Upper;
else
m_caseFirst = CaseFirst::False;
String sensitivityString = intlStringOption(globalObject, options, vm.propertyNames->sensitivity, { "base", "accent", "case", "variant" }, "sensitivity must be either \"base\", \"accent\", \"case\", or \"variant\"", nullptr);
RETURN_IF_EXCEPTION(scope, void());
if (sensitivityString == "base")
m_sensitivity = Sensitivity::Base;
else if (sensitivityString == "accent")
m_sensitivity = Sensitivity::Accent;
else if (sensitivityString == "case")
m_sensitivity = Sensitivity::Case;
else
m_sensitivity = Sensitivity::Variant;
TriState ignorePunctuation = intlBooleanOption(globalObject, options, vm.propertyNames->ignorePunctuation);
RETURN_IF_EXCEPTION(scope, void());
m_ignorePunctuation = (ignorePunctuation == TriState::True);
UErrorCode status = U_ZERO_ERROR;
m_collator = std::unique_ptr<UCollator, UCollatorDeleter>(ucol_open(m_locale.utf8().data(), &status));
if (U_FAILURE(status)) {
throwTypeError(globalObject, scope, "failed to initialize Collator"_s);
return;
}
UColAttributeValue strength = UCOL_PRIMARY;
UColAttributeValue caseLevel = UCOL_OFF;
UColAttributeValue caseFirst = UCOL_OFF;
switch (m_sensitivity) {
case Sensitivity::Base:
break;
case Sensitivity::Accent:
strength = UCOL_SECONDARY;
break;
case Sensitivity::Case:
caseLevel = UCOL_ON;
break;
case Sensitivity::Variant:
strength = UCOL_TERTIARY;
break;
}
switch (m_caseFirst) {
case CaseFirst::False:
break;
case CaseFirst::Lower:
caseFirst = UCOL_LOWER_FIRST;
break;
case CaseFirst::Upper:
caseFirst = UCOL_UPPER_FIRST;
break;
}
ucol_setAttribute(m_collator.get(), UCOL_STRENGTH, strength, &status);
ucol_setAttribute(m_collator.get(), UCOL_CASE_LEVEL, caseLevel, &status);
ucol_setAttribute(m_collator.get(), UCOL_CASE_FIRST, caseFirst, &status);
ucol_setAttribute(m_collator.get(), UCOL_NUMERIC_COLLATION, m_numeric ? UCOL_ON : UCOL_OFF, &status);
// FIXME: Setting UCOL_ALTERNATE_HANDLING to UCOL_SHIFTED causes punctuation and whitespace to be
// ignored. There is currently no way to ignore only punctuation.
ucol_setAttribute(m_collator.get(), UCOL_ALTERNATE_HANDLING, m_ignorePunctuation ? UCOL_SHIFTED : UCOL_DEFAULT, &status);
// "The method is required to return 0 when comparing Strings that are considered canonically
// equivalent by the Unicode standard."
ucol_setAttribute(m_collator.get(), UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
ASSERT(U_SUCCESS(status));
}
// https://tc39.es/ecma402/#sec-collator-comparestrings
JSValue IntlCollator::compareStrings(JSGlobalObject* globalObject, StringView x, StringView y) const
{
ASSERT(m_collator);
VM& vm = globalObject->vm();
auto scope = DECLARE_THROW_SCOPE(vm);
UErrorCode status = U_ZERO_ERROR;
UCollationResult result = UCOL_EQUAL;
if (x.is8Bit() && y.is8Bit() && x.isAllASCII() && y.isAllASCII())
result = ucol_strcollUTF8(m_collator.get(), bitwise_cast<const char*>(x.characters8()), x.length(), bitwise_cast<const char*>(y.characters8()), y.length(), &status);
else {
auto getCharacters = [&] (const StringView& view, Vector<UChar>& buffer) -> const UChar* {
if (!view.is8Bit())
return view.characters16();
buffer.resize(view.length());
StringImpl::copyCharacters(buffer.data(), view.characters8(), view.length());
return buffer.data();
};
Vector<UChar> xBuffer;
Vector<UChar> yBuffer;
const UChar* xCharacters = getCharacters(x, xBuffer);
const UChar* yCharacters = getCharacters(y, yBuffer);
result = ucol_strcoll(m_collator.get(), xCharacters, x.length(), yCharacters, y.length());
}
if (U_FAILURE(status))
return throwException(globalObject, scope, createError(globalObject, "Failed to compare strings."_s));
return jsNumber(result);
}
ASCIILiteral IntlCollator::usageString(Usage usage)
{
switch (usage) {
case Usage::Sort:
return "sort"_s;
case Usage::Search:
return "search"_s;
}
ASSERT_NOT_REACHED();
return ASCIILiteral::null();
}
ASCIILiteral IntlCollator::sensitivityString(Sensitivity sensitivity)
{
switch (sensitivity) {
case Sensitivity::Base:
return "base"_s;
case Sensitivity::Accent:
return "accent"_s;
case Sensitivity::Case:
return "case"_s;
case Sensitivity::Variant:
return "variant"_s;
}
ASSERT_NOT_REACHED();
return ASCIILiteral::null();
}
ASCIILiteral IntlCollator::caseFirstString(CaseFirst caseFirst)
{
switch (caseFirst) {
case CaseFirst::False:
return "false"_s;
case CaseFirst::Lower:
return "lower"_s;
case CaseFirst::Upper:
return "upper"_s;
}
ASSERT_NOT_REACHED();
return ASCIILiteral::null();
}
// https://tc39.es/ecma402/#sec-intl.collator.prototype.resolvedoptions
JSObject* IntlCollator::resolvedOptions(JSGlobalObject* globalObject) const
{
VM& vm = globalObject->vm();
JSObject* options = constructEmptyObject(globalObject);
options->putDirect(vm, vm.propertyNames->locale, jsString(vm, m_locale));
options->putDirect(vm, vm.propertyNames->usage, jsNontrivialString(vm, usageString(m_usage)));
options->putDirect(vm, vm.propertyNames->sensitivity, jsNontrivialString(vm, sensitivityString(m_sensitivity)));
options->putDirect(vm, vm.propertyNames->ignorePunctuation, jsBoolean(m_ignorePunctuation));
options->putDirect(vm, vm.propertyNames->collation, jsString(vm, m_collation));
options->putDirect(vm, vm.propertyNames->numeric, jsBoolean(m_numeric));
options->putDirect(vm, vm.propertyNames->caseFirst, jsNontrivialString(vm, caseFirstString(m_caseFirst)));
return options;
}
void IntlCollator::setBoundCompare(VM& vm, JSBoundFunction* format)
{
m_boundCompare.set(vm, this, format);
}
} // namespace JSC