blob: 38cf23af06dc2d6a205e64e4db75ac3f26428de9 [file] [log] [blame]
#!/usr/bin/env python3
# Copyright (C) 2010, 2013-2017 Apple Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os
import sys
types = {
"wordchar": { "UseTable" : True, "data": ['_', ('0','9'), ('A', 'Z'), ('a','z')]},
"wordUnicodeIgnoreCaseChar": { "UseTable" : False, "data": ['_', ('0', '9'), ('A', 'Z'), ('a', 'z'), 0x017f, 0x212a]},
"nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0x10ffff)]},
"nonwordUnicodeIgnoreCaseChar": { "UseTable" : False, "Inverse": "wordUnicodeIgnoreCaseChar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0x017e), (0x0180, 0x2129), (0x212b, 0x10ffff)]},
"newline": { "UseTable" : False, "data": ['\n', '\r', 0x2028, 0x2029]},
"spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a), 0xfeff]},
"nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xfefe), (0xff00, 0x10ffff)]},
"digits": { "UseTable" : False, "data": [('0', '9')]},
"nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0x10ffff)] }
}
entriesPerLine = 50
arrays = "";
functions = "";
emitTables = (len(sys.argv) < 2 or sys.argv[1] != "--no-tables")
for name, classes in types.items():
ranges = [];
size = 0;
for _class in classes["data"]:
if type(_class) == str:
ranges.append((ord(_class), ord(_class)))
elif type(_class) == int:
ranges.append((_class, _class))
else:
(min, max) = _class;
if type(min) == str:
min = ord(min)
if type(max) == str:
max = ord(max)
if max > 0x7f and min <= 0x7f:
ranges.append((min, 0x7f))
min = 0x80
ranges.append((min,max))
ranges.sort();
if emitTables and classes["UseTable"] and (not "Inverse" in classes):
array = ("static const char _%sData[65536] = {\n" % name);
i = 0
for (min,max) in ranges:
while i < min:
i = i + 1
array += ('0,')
if (i % entriesPerLine == 0) and (i != 0):
array += ('\n')
while i <= max:
i = i + 1
if (i == 65536):
array += ("1")
else:
array += ('1,')
if (i % entriesPerLine == 0) and (i != 0):
array += ('\n')
while i < 0xffff:
array += ("0,")
i = i + 1;
if (i % entriesPerLine == 0) and (i != 0):
array += ('\n')
if i == 0xffff:
array += ("0")
array += ("\n};\n\n");
arrays += array
# Generate createFunction:
function = "";
function += ("std::unique_ptr<CharacterClass> %sCreate()\n" % name)
function += ("{\n")
if emitTables and classes["UseTable"]:
if "Inverse" in classes:
function += (" auto characterClass = makeUnique<CharacterClass>(_%sData, true);\n" % (classes["Inverse"]))
else:
function += (" auto characterClass = makeUnique<CharacterClass>(_%sData, false);\n" % (name))
else:
function += (" auto characterClass = makeUnique<CharacterClass>();\n")
hasBMPCharacters = False
hasNonBMPCharacters = False
for (min, max) in ranges:
if min < 0x10000:
hasBMPCharacters = True
if max >= 0x10000:
hasNonBMPCharacters = True
if (min == max):
if (min > 127):
function += (" characterClass->m_matchesUnicode.append(0x%04x);\n" % min)
else:
function += (" characterClass->m_matches.append(0x%02x);\n" % min)
continue
if (min > 127) or (max > 127):
function += (" characterClass->m_rangesUnicode.append(CharacterRange(0x%04x, 0x%04x));\n" % (min, max))
else:
function += (" characterClass->m_ranges.append(CharacterRange(0x%02x, 0x%02x));\n" % (min, max))
function += (" characterClass->m_characterWidths = CharacterClassWidths::%s;\n" % (("Unknown", "HasBMPChars", "HasNonBMPChars", "HasBothBMPAndNonBMP")[int(hasNonBMPCharacters) * 2 + int(hasBMPCharacters)]))
function += (" return characterClass;\n")
function += ("}\n\n")
functions += function
if (len(sys.argv) > 1):
path = sys.argv[-1]
dirname = os.path.dirname(path)
if not os.path.isdir(dirname):
os.makedirs(dirname)
f = open(path, "w")
f.write(arrays)
f.write(functions)
f.close()
else:
print(arrays)
print(functions)