| /* |
| * Copyright (C) 2012-2018 Apple Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| function printHeader() |
| { |
| var copyright = ( |
| "/*" + "\n" + |
| " * Copyright (C) 2012-2018 Apple Inc. All rights reserved." + "\n" + |
| " *" + "\n" + |
| " * Redistribution and use in source and binary forms, with or without" + "\n" + |
| " * modification, are permitted provided that the following conditions" + "\n" + |
| " * are met:" + "\n" + |
| " * 1. Redistributions of source code must retain the above copyright" + "\n" + |
| " * notice, this list of conditions and the following disclaimer." + "\n" + |
| " * 2. Redistributions in binary form must reproduce the above copyright" + "\n" + |
| " * notice, this list of conditions and the following disclaimer in the" + "\n" + |
| " * documentation and/or other materials provided with the distribution." + "\n" + |
| " *" + "\n" + |
| " * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY" + "\n" + |
| " * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE" + "\n" + |
| " * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR" + "\n" + |
| " * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR" + "\n" + |
| " * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL," + "\n" + |
| " * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO," + "\n" + |
| " * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR" + "\n" + |
| " * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY" + "\n" + |
| " * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT" + "\n" + |
| " * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE" + "\n" + |
| " * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. " + "\n" + |
| " */"); |
| |
| print(copyright); |
| print(); |
| print("// DO NOT EDIT! - this file autogenerated by YarrCanonicalize.js"); |
| print(); |
| print('#include "config.h"'); |
| print('#include "YarrCanonicalize.h"'); |
| print(); |
| print("namespace JSC { namespace Yarr {"); |
| print(); |
| } |
| |
| function printFooter() |
| { |
| print("} } // JSC::Yarr"); |
| print(); |
| } |
| |
| // Helper function to convert a number to a fixed width hex representation of a UChar32. |
| function hex(x) |
| { |
| var s = Number(x).toString(16); |
| while (s.length < 4) |
| s = 0 + s; |
| return "0x" + s; |
| } |
| |
| // See ES 6.0, 21.2.2.8.2 Steps 3 |
| function canonicalize(ch) |
| { |
| var u = String.fromCharCode(ch).toUpperCase(); |
| if (u.length > 1) |
| return ch; |
| var cu = u.charCodeAt(0); |
| if (ch >= 128 && cu < 128) |
| return ch; |
| return cu; |
| } |
| |
| var MAX_UCS2 = 0xFFFF; |
| |
| function createUCS2CanonicalGroups() |
| { |
| var groupedCanonically = []; |
| // Pass 1: populate groupedCanonically - this is mapping from canonicalized |
| // values back to the set of character code that canonicalize to them. |
| for (var i = 0; i <= MAX_UCS2; ++i) { |
| var ch = canonicalize(i); |
| if (!groupedCanonically[ch]) |
| groupedCanonically[ch] = []; |
| groupedCanonically[ch].push(i); |
| } |
| |
| return groupedCanonically; |
| } |
| |
| function createTables(prefix, maxValue, canonicalGroups) |
| { |
| var prefixLower = prefix.toLowerCase(); |
| var prefixUpper = prefix.toUpperCase(); |
| var typeInfo = []; |
| var characterSetInfo = []; |
| // Pass 2: populate typeInfo & characterSetInfo. For every character calculate |
| // a typeInfo value, described by the types above, and a value payload. |
| for (cu in canonicalGroups) { |
| // The set of characters that canonicalize to cu |
| var characters = canonicalGroups[cu]; |
| |
| // If there is only one, it is unique. |
| if (characters.length == 1) { |
| typeInfo[characters[0]] = "CanonicalizeUnique:0"; |
| continue; |
| } |
| |
| // Sort the array. |
| characters.sort(function(x,y){return x-y;}); |
| |
| // If there are more than two characters, create an entry in characterSetInfo. |
| if (characters.length > 2) { |
| for (i in characters) |
| typeInfo[characters[i]] = "CanonicalizeSet:" + characterSetInfo.length; |
| characterSetInfo.push(characters); |
| |
| continue; |
| } |
| |
| // We have a pair, mark alternating ranges, otherwise track whether this is the low or high partner. |
| var lo = characters[0]; |
| var hi = characters[1]; |
| var delta = hi - lo; |
| if (delta == 1) { |
| var type = lo & 1 ? "CanonicalizeAlternatingUnaligned:0" : "CanonicalizeAlternatingAligned:0"; |
| typeInfo[lo] = type; |
| typeInfo[hi] = type; |
| } else { |
| typeInfo[lo] = "CanonicalizeRangeLo:" + delta; |
| typeInfo[hi] = "CanonicalizeRangeHi:" + delta; |
| } |
| } |
| |
| var rangeInfo = []; |
| // Pass 3: coallesce types into ranges. |
| for (var end = 0; end <= maxValue; ++end) { |
| var begin = end; |
| var type = typeInfo[end]; |
| while (end < maxValue && typeInfo[end + 1] == type) |
| ++end; |
| rangeInfo.push({begin:begin, end:end, type:type}); |
| } |
| |
| for (i in characterSetInfo) { |
| var characters = "" |
| var set = characterSetInfo[i]; |
| for (var j in set) |
| characters += hex(set[j]) + ", "; |
| print("const UChar32 " + prefixLower + "CharacterSet" + i + "[] = { " + characters + "0 };"); |
| } |
| print(); |
| print("static constexpr size_t " + prefixUpper + "_CANONICALIZATION_SETS = " + characterSetInfo.length + ";"); |
| print("const UChar32* const " + prefixLower + "CharacterSetInfo[" + prefixUpper + "_CANONICALIZATION_SETS] = {"); |
| for (i in characterSetInfo) |
| print(" " + prefixLower + "CharacterSet" + i + ","); |
| print("};"); |
| print(); |
| print("const size_t " + prefixUpper + "_CANONICALIZATION_RANGES = " + rangeInfo.length + ";"); |
| print("const CanonicalizationRange " + prefixLower + "RangeInfo[" + prefixUpper + "_CANONICALIZATION_RANGES] = {"); |
| for (i in rangeInfo) { |
| var info = rangeInfo[i]; |
| var typeAndValue = info.type.split(':'); |
| print(" { " + hex(info.begin) + ", " + hex(info.end) + ", " + hex(typeAndValue[1]) + ", " + typeAndValue[0] + " },"); |
| } |
| print("};"); |
| print(); |
| // Create canonical table for LChar domain |
| let line = "const uint16_t canonicalTableLChar[256] = {"; |
| for (let i = 0; i < 256; i++) { |
| if (!(i % 16)) { |
| print(line); |
| line = " "; |
| } |
| let canonicalChar = canonicalize(i); |
| line = line + (canonicalChar < 16 ? "0x0" : "0x") + canonicalChar.toString(16); |
| if ((i % 16) != 15) |
| line += ", "; |
| else if (i != 255) |
| line += ","; |
| } |
| print(line); |
| print("};"); |
| print(); |
| |
| } |
| |
| printHeader(); |
| |
| createTables("UCS2", MAX_UCS2, createUCS2CanonicalGroups()); |
| |
| printFooter(); |
| |