blob: 5e5ceee6e765221091a8c1489cbb9bec2bd1ced4 [file] [log] [blame]
#!/usr/bin/env python
#
# Copyright (C) 2020 Apple Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import argparse, os, re
# For now: Searches all files in current directory tree.
# Reads every file so it does take a while.
# On my fast computer takes about 15 seconds for WebCore.
# Things to do next:
#
# Clever default for use on WebKit source tree instead of current directory by default.
# Report third party sources separately from WebKit project files.
# Hard-coded for now, could read these out of a file instead.
# Term, then the regular expression for that term.
nonInclusiveTerms = [
[ "blacklist", "black ?list" ],
[ "whitelist", "white ?list" ],
[ "slave", "slave" ]
]
IGNORE_DIRECTORIES = ['.svn', '.git', 'autoinstalled', 'buildstream', 'node_modules']
IGNORE_FILES_STARTING_WITH = ('ChangeLog')
IGNORE_FILES_ENDING_WITH = ('.log', '.order', '.pyc', '.swp', '.xcuserstate', '.db', '.db-shm', '.db-wal')
IGNORE_FILE_NAMES = ['report-non-inclusive-language', 'inclusive_language.py']
parser = argparse.ArgumentParser(description='Report counts and locations of non-inclusive terms.')
parser.add_argument('--verbose', '-v', action='store_true');
args = parser.parse_args()
for term in nonInclusiveTerms:
term[1] = re.compile(term[1], re.IGNORECASE)
term.append(0)
term.append(0)
root = os.getcwd()
for subroot, directories, files in os.walk(root):
prefix = subroot[len(root) + 1:]
if any(directory in prefix.split('/') for directory in IGNORE_DIRECTORIES):
continue
for file in files:
if file.startswith(IGNORE_FILES_STARTING_WITH):
continue
if file.endswith(IGNORE_FILES_ENDING_WITH):
continue
if file in IGNORE_FILE_NAMES:
continue
if os.path.islink(os.path.join(subroot, file)):
continue
handle = open(os.path.join(subroot, file), "r")
for line in handle.readlines():
for term in nonInclusiveTerms:
if re.search(term[1], line):
term[2] += 1
if args.verbose:
for term in nonInclusiveTerms:
if term[2] != 0:
print(os.path.join(prefix, file) + ": " + term[0] + ": " + str(term[2]))
for term in nonInclusiveTerms:
term[3] += term[2]
term[2] = 0
details = ['{}: {}'.format(term[0], term[3]) for term in nonInclusiveTerms]
total = sum([term[3] for term in nonInclusiveTerms])
print('Total: {} ({})'.format(total, ', '.join(details)))