blob: 0d63ff67c1453120169b4b8652b97fbd15aee6df [file] [log] [blame]
# Copyright (c) 2011, Apple Inc. All rights reserved.
# Copyright (c) 2009, 2011, 2012 Google Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# WebKit's Python module for committer and reviewer validation.
import fnmatch
import json
import sys
from functools import reduce
from webkitpy.common.editdistance import edit_distance
from webkitpy.common.memoized import memoized
from webkitpy.common.system.filesystem import FileSystem
from webkitpy.common.unicode_compatibility import encode_for, unicode
class Contributor(object):
def __init__(self, name, email_or_emails, irc_nickname_or_nicknames=None, alias_or_aliases=None, expertise=None):
assert(name)
assert(email_or_emails)
self.full_name = name
if isinstance(email_or_emails, str):
self.emails = [email_or_emails]
else:
self.emails = email_or_emails
self._case_preserved_emails = self.emails
self.emails = list(map(lambda email: email.lower(), self.emails)) # Emails are case-insensitive.
if isinstance(irc_nickname_or_nicknames, str):
self.irc_nicknames = [irc_nickname_or_nicknames]
else:
self.irc_nicknames = irc_nickname_or_nicknames
if isinstance(alias_or_aliases, str):
self.aliases = [alias_or_aliases]
else:
self.aliases = alias_or_aliases
self.expertise = expertise
self.can_commit = False
self.can_review = False
self.is_bot = False
def bugzilla_email(self):
# FIXME: We're assuming the first email is a valid bugzilla email,
# which might not be right.
return self.emails[0]
def __str__(self):
return encode_for(u'"{}" <{}>'.format(unicode(self.full_name), unicode(self.emails[0])), str)
def __unicode__(self):
return u'"{}" <{}>'.format(unicode(self.full_name), unicode(self.emails[0]))
def __hash__(self):
return hash(self.full_name) \
^ reduce(lambda a, b: hash(a) ^ hash(b), (self.emails or []) + [0]) \
^ reduce(lambda a, b: hash(a) ^ hash(b), (self._case_preserved_emails or []) + [0]) \
^ reduce(lambda a, b: hash(a) ^ hash(b), (self.irc_nicknames or []) + [0]) \
^ hash(self.expertise) \
^ hash(self.can_commit) \
^ hash(self.can_review)
def __eq__(self, other):
return (other is not None
and self.full_name == other.full_name
and self.emails == other.emails
and self._case_preserved_emails == other._case_preserved_emails
and self.irc_nicknames == other.irc_nicknames
and self.expertise == other.expertise
and self.can_commit == other.can_commit
and self.can_review == other.can_review)
def contains_string(self, search_string):
string = search_string.lower()
if string in self.full_name.lower():
return True
if self.irc_nicknames:
for nickname in self.irc_nicknames:
if string in nickname.lower():
return True
if self.aliases:
for alias in self.aliases:
if string in alias.lower():
return True
for email in self.emails:
if string in email:
return True
return False
def mentioned_in_text(self, text):
lower_text = text.lower()
if self.full_name.lower() in lower_text:
return True
if self.irc_nicknames:
for nickname in self.irc_nicknames:
if nickname.lower() in lower_text:
return True
if self.aliases:
for alias in self.aliases:
if alias.lower() in lower_text:
return True
for email in self.emails:
if email in lower_text:
return True
return False
def matches_glob(self, glob_string):
if fnmatch.fnmatch(self.full_name, glob_string):
return True
if self.irc_nicknames:
for nickname in self.irc_nicknames:
if fnmatch.fnmatch(nickname, glob_string):
return True
for email in self.emails:
if fnmatch.fnmatch(email, glob_string):
return True
return False
def as_dict(self):
info = {"emails" : self._case_preserved_emails}
if self.aliases:
info["aliases"] = self.aliases
if self.can_review:
info["status"] = "reviewer"
elif self.can_commit:
info["status"] = "committer"
if self.irc_nicknames:
info["nicks"] = self.irc_nicknames
if self.expertise:
info["expertise"] = self.expertise
if self.is_bot:
info["class"] = "bot"
return info
class Committer(Contributor):
def __init__(self, name, email_or_emails, irc_nickname=None, alias_or_aliases=None, expertise=None):
Contributor.__init__(self, name, email_or_emails, irc_nickname, alias_or_aliases, expertise)
self.can_commit = True
class Reviewer(Committer):
def __init__(self, name, email_or_emails, irc_nickname=None, alias_or_aliases=None, expertise=None):
Committer.__init__(self, name, email_or_emails, irc_nickname, alias_or_aliases, expertise)
self.can_review = True
class Bot(Contributor):
def __init__(self, name, email_or_emails, irc_nickname=None, alias_or_aliases=None, expertise=None):
Contributor.__init__(self, name, email_or_emails, irc_nickname, alias_or_aliases, expertise)
self.is_bot = True
class CommitterList(object):
# Committers and reviewers are passed in to allow easy testing
def __init__(self,
committers=[],
reviewers=[],
contributors=[]):
if committers or reviewers or contributors:
self.load_test_data(committers, reviewers, contributors)
else:
self.load_json()
self._contributors_by_name = {}
self._accounts_by_email = {}
self._accounts_by_login = {}
def load_json(self):
filesystem = FileSystem()
json_path = filesystem.join(filesystem.dirname(filesystem.path_to_module('webkitpy.common.config')), 'contributors.json')
try:
contributors = json.loads(filesystem.read_text_file(json_path))
except ValueError as e:
sys.exit('contributors.json is malformed: ' + str(e))
self._contributors = []
self._committers = []
self._reviewers = []
for name, data in contributors.items():
contributor = None
status = data.get('status')
if status == "reviewer":
contributor = Reviewer(name, data.get('emails'), data.get('nicks'), data.get('aliases'), data.get('expertise'))
self._reviewers.append(contributor)
self._committers.append(contributor)
elif status == "committer":
contributor = Committer(name, data.get('emails'), data.get('nicks'), data.get('aliases'), data.get('expertise'))
self._committers.append(contributor)
elif data.get('class') == 'bot':
contributor = Bot(name, data.get('emails'), data.get('nicks'), data.get('aliases'), data.get('expertise'))
else:
contributor = Contributor(name, data.get('emails'), data.get('nicks'), data.get('aliases'), data.get('expertise'))
self._contributors.append(contributor)
def load_test_data(self, committers, reviewers, contributors):
self._contributors = contributors + committers + reviewers
self._committers = committers + reviewers
self._reviewers = reviewers
@staticmethod
def _contributor_list_to_dict(list):
committers_dict = {}
for contributor in sorted(list):
committers_dict[contributor.full_name] = contributor.as_dict()
return committers_dict
def as_json(self):
result = CommitterList._contributor_list_to_dict(self._contributors)
return json.dumps(result, sort_keys=True, indent=3, separators=(',', ' : '))
def reformat_in_place(self):
filesystem = FileSystem()
json_path = filesystem.join(filesystem.dirname(filesystem.path_to_module('webkitpy.common.config')), 'contributors.json')
filesystem.write_text_file(json_path, self.as_json())
# Contributors who are not in any other category.
def _exclusive_contributors(self):
return list(filter(lambda contributor: not (contributor.can_commit or contributor.can_review), self._contributors))
# Committers who are not reviewers.
def _exclusive_committers(self):
return list(filter(lambda contributor: contributor.can_commit and not contributor.can_review, self._committers))
# This is the superset of contributors + committers + reviewers
def contributors(self):
return self._contributors
# This is the superset of committers + reviewers
def committers(self):
return self._committers
def reviewers(self):
return self._reviewers
def _name_to_contributor_map(self):
if not len(self._contributors_by_name):
for contributor in self._contributors:
assert contributor.full_name
assert contributor.full_name.lower() not in self._contributors_by_name # We should never have duplicate names.
self._contributors_by_name[contributor.full_name.lower()] = contributor
if contributor.aliases is None:
continue
for alias in contributor.aliases:
assert alias.lower() not in self._contributors_by_name
self._contributors_by_name[alias.lower()] = contributor
return self._contributors_by_name
def _email_to_account_map(self):
if not len(self._accounts_by_email):
for account in self._contributors:
for email in account.emails:
assert(email not in self._accounts_by_email) # We should never have duplicate emails.
self._accounts_by_email[email] = account
return self._accounts_by_email
def _login_to_account_map(self):
if not len(self._accounts_by_login):
for account in self._contributors:
if account.emails:
login = account.bugzilla_email()
assert(login not in self._accounts_by_login) # We should never have duplicate emails.
self._accounts_by_login[login] = account
return self._accounts_by_login
def _committer_only(self, record):
if record and not record.can_commit:
return None
return record
def _reviewer_only(self, record):
if record and not record.can_review:
return None
return record
def contributor_by_irc_nickname(self, irc_nickname):
for contributor in self.contributors():
# FIXME: This should do case-insensitive comparison or assert that all IRC nicknames are in lowercase
if contributor.irc_nicknames and irc_nickname in contributor.irc_nicknames:
return contributor
return None
def contributors_by_search_string(self, string):
glob_matches = list(filter(lambda contributor: contributor.matches_glob(string), self.contributors()))
return glob_matches or list(filter(lambda contributor: contributor.contains_string(string), self.contributors()))
def contributors_by_email_username(self, string):
string = string + '@'
result = []
for contributor in self.contributors():
for email in contributor.emails:
if email.startswith(string):
result.append(contributor)
break
return result
def _contributor_name_shorthands(self, contributor):
if ' ' not in contributor.full_name:
return []
split_fullname = contributor.full_name.split()
first_name = split_fullname[0]
last_name = split_fullname[-1]
return first_name, last_name, first_name + last_name[0], first_name + ' ' + last_name[0]
def _tokenize_contributor_name(self, contributor):
full_name_in_lowercase = contributor.full_name.lower()
tokens = [full_name_in_lowercase] + full_name_in_lowercase.split()
if contributor.irc_nicknames:
return tokens + [nickname.lower() for nickname in contributor.irc_nicknames if len(nickname) > 5]
return tokens
def contributors_by_fuzzy_match(self, string):
string_in_lowercase = string.lower()
# 1. Exact match for fullname, email and irc_nicknames
account = self.contributor_by_name(string_in_lowercase) or self.contributor_by_email(string_in_lowercase) or self.contributor_by_irc_nickname(string_in_lowercase)
if account:
return [account], 0
# 2. Exact match for email username (before @)
accounts = self.contributors_by_email_username(string_in_lowercase)
if accounts and len(accounts) == 1:
return accounts, 0
# 3. Exact match for first name, last name, and first name + initial combinations such as "Dan B" and "Tim H"
accounts = [contributor for contributor in self.contributors() if string in self._contributor_name_shorthands(contributor)]
if accounts and len(accounts) == 1:
return accounts, 0
# 4. Finally, fuzzy-match using edit-distance
string = string_in_lowercase
contributorWithMinDistance = []
minDistance = len(string) / 2 - 1
for contributor in self.contributors():
tokens = self._tokenize_contributor_name(contributor)
editdistances = [edit_distance(token, string) for token in tokens if abs(len(token) - len(string)) <= minDistance]
if not editdistances:
continue
distance = min(editdistances)
if distance == minDistance:
contributorWithMinDistance.append(contributor)
elif distance < minDistance:
contributorWithMinDistance = [contributor]
minDistance = distance
if not len(contributorWithMinDistance):
return [], len(string)
return contributorWithMinDistance, minDistance
def contributor_by_email(self, email):
return self._email_to_account_map().get(email.lower()) if email else None
def contributor_by_name(self, name):
return self._name_to_contributor_map().get(name.lower()) if name else None
def committer_by_email(self, email):
return self._committer_only(self.contributor_by_email(email))
def committer_by_name(self, name):
return self._committer_only(self.contributor_by_name(name))
def reviewer_by_email(self, email):
return self._reviewer_only(self.contributor_by_email(email))
def reviewer_by_name(self, email):
return self._reviewer_only(self.contributor_by_name(email))