Tools/Scripts/report-non-inclusive-language - WebKit - Git at Google

 #!/usr/bin/env python
 #
 # Copyright (C) 2020 Apple Inc. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1.  Redistributions of source code must retain the above copyright
 #     notice, this list of conditions and the following disclaimer.
 # 2.  Redistributions in binary form must reproduce the above copyright
 #     notice, this list of conditions and the following disclaimer in the
 #     documentation and/or other materials provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 # DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 import argparse, os, re

 # For now: Searches all files in current directory tree.
 # Reads every file so it does take a while.
 # On my fast computer takes about 15 seconds for WebCore.

 # Things to do next:
 #
 #   Clever default for use on WebKit source tree instead of current directory by default.
 #   Report third party sources separately from WebKit project files.

 # Hard-coded for now, could read these out of a file instead.
 # Term, then the regular expression for that term.
 nonInclusiveTerms = [
     [ "blacklist", "black ?list" ],
     [ "whitelist", "white ?list" ],
     [ "slave", "slave" ]
 ]

 IGNORE_DIRECTORIES = ['.svn', '.git', 'autoinstalled', 'buildstream', 'node_modules']
 IGNORE_FILES_STARTING_WITH = ('ChangeLog')
 IGNORE_FILES_ENDING_WITH = ('.log', '.order', '.pyc', '.swp', '.xcuserstate', '.db', '.db-shm', '.db-wal')
 IGNORE_FILE_NAMES = ['report-non-inclusive-language', 'inclusive_language.py']

 parser = argparse.ArgumentParser(description='Report counts and locations of non-inclusive terms.')
 parser.add_argument('--verbose', '-v', action='store_true');
 args = parser.parse_args()

 for term in nonInclusiveTerms:
     term[1] = re.compile(term[1], re.IGNORECASE)
     term.append(0)
     term.append(0)

 root = os.getcwd()
 for subroot, directories, files in os.walk(root):
     prefix = subroot[len(root) + 1:]
     if any(directory in prefix.split('/') for directory in IGNORE_DIRECTORIES):
         continue
     for file in files:
         if file.startswith(IGNORE_FILES_STARTING_WITH):
             continue
         if file.endswith(IGNORE_FILES_ENDING_WITH):
             continue
         if file in IGNORE_FILE_NAMES:
             continue
         if os.path.islink(os.path.join(subroot, file)):
             continue
         handle = open(os.path.join(subroot, file), "r")
         for line in handle.readlines():
             for term in nonInclusiveTerms:
                 if re.search(term[1], line):
                     term[2] += 1
         if args.verbose:
             for term in nonInclusiveTerms:
                 if term[2] != 0:
                     print(os.path.join(prefix, file) + ": " + term[0] + ": " + str(term[2]))
         for term in nonInclusiveTerms:
             term[3] += term[2]
             term[2] = 0

 details = ['{}: {}'.format(term[0], term[3]) for term in nonInclusiveTerms]
 total = sum([term[3] for term in nonInclusiveTerms])
 print('Total: {} ({})'.format(total, ', '.join(details)))
	#!/usr/bin/env python
	#
	# Copyright (C) 2020 Apple Inc. All rights reserved.
	#
	# Redistribution and use in source and binary forms, with or without
	# modification, are permitted provided that the following conditions
	# are met:
	# 1. Redistributions of source code must retain the above copyright
	# notice, this list of conditions and the following disclaimer.
	# 2. Redistributions in binary form must reproduce the above copyright
	# notice, this list of conditions and the following disclaimer in the
	# documentation and/or other materials provided with the distribution.
	#
	# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
	# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
	# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	# DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
	# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
	# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
	# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	import argparse, os, re

	# For now: Searches all files in current directory tree.
	# Reads every file so it does take a while.
	# On my fast computer takes about 15 seconds for WebCore.

	# Things to do next:
	#
	# Clever default for use on WebKit source tree instead of current directory by default.
	# Report third party sources separately from WebKit project files.

	# Hard-coded for now, could read these out of a file instead.
	# Term, then the regular expression for that term.
	nonInclusiveTerms = [
	[ "blacklist", "black ?list" ],
	[ "whitelist", "white ?list" ],
	[ "slave", "slave" ]
	]

	IGNORE_DIRECTORIES = ['.svn', '.git', 'autoinstalled', 'buildstream', 'node_modules']
	IGNORE_FILES_STARTING_WITH = ('ChangeLog')
	IGNORE_FILES_ENDING_WITH = ('.log', '.order', '.pyc', '.swp', '.xcuserstate', '.db', '.db-shm', '.db-wal')
	IGNORE_FILE_NAMES = ['report-non-inclusive-language', 'inclusive_language.py']

	parser = argparse.ArgumentParser(description='Report counts and locations of non-inclusive terms.')
	parser.add_argument('--verbose', '-v', action='store_true');
	args = parser.parse_args()

	for term in nonInclusiveTerms:
	term[1] = re.compile(term[1], re.IGNORECASE)
	term.append(0)
	term.append(0)

	root = os.getcwd()
	for subroot, directories, files in os.walk(root):
	prefix = subroot[len(root) + 1:]
	if any(directory in prefix.split('/') for directory in IGNORE_DIRECTORIES):
	continue
	for file in files:
	if file.startswith(IGNORE_FILES_STARTING_WITH):
	continue
	if file.endswith(IGNORE_FILES_ENDING_WITH):
	continue
	if file in IGNORE_FILE_NAMES:
	continue
	if os.path.islink(os.path.join(subroot, file)):
	continue
	handle = open(os.path.join(subroot, file), "r")
	for line in handle.readlines():
	for term in nonInclusiveTerms:
	if re.search(term[1], line):
	term[2] += 1
	if args.verbose:
	for term in nonInclusiveTerms:
	if term[2] != 0:
	print(os.path.join(prefix, file) + ": " + term[0] + ": " + str(term[2]))
	for term in nonInclusiveTerms:
	term[3] += term[2]
	term[2] = 0

	details = ['{}: {}'.format(term[0], term[3]) for term in nonInclusiveTerms]
	total = sum([term[3] for term in nonInclusiveTerms])
	print('Total: {} ({})'.format(total, ', '.join(details)))