| #!/usr/bin/env python |
| |
| # Copyright (c) 2009, Google Inc. All rights reserved. |
| # |
| # Redistribution and use in source and binary forms, with or without |
| # modification, are permitted provided that the following conditions are |
| # met: |
| # |
| # * Redistributions of source code must retain the above copyright |
| # notice, this list of conditions and the following disclaimer. |
| # * Redistributions in binary form must reproduce the above |
| # copyright notice, this list of conditions and the following disclaimer |
| # in the documentation and/or other materials provided with the |
| # distribution. |
| # * Neither the name of Google Inc. nor the names of its |
| # contributors may be used to endorse or promote products derived from |
| # this software without specific prior written permission. |
| # |
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| # |
| # Checks Python's known list of committers against lists.webkit.org and SVN history. |
| |
| |
| import logging |
| import os |
| import subprocess |
| import re |
| import urllib2 |
| from datetime import date, datetime, timedelta |
| from optparse import OptionParser |
| |
| from webkitpy.common.config.committers import CommitterList |
| from webkitpy.common.checkout.changelog import ChangeLogEntry |
| from webkitpy.common.checkout.scm import Git |
| from webkitpy.common.net.bugzilla import Bugzilla |
| |
| # WebKit includes a built copy of BeautifulSoup in Scripts/webkitpy |
| # so this import should always succeed. |
| from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup |
| |
| _log = logging.getLogger(__name__) |
| |
| def print_list_if_non_empty(title, list_to_print): |
| if not list_to_print: |
| return |
| print # Newline before the list |
| print title |
| for item in list_to_print: |
| print item |
| |
| |
| class CommitterListFromMailingList(object): |
| committers_list_url = "http://lists.webkit.org/mailman/roster/webkit-committers" |
| reviewers_list_url = "http://lists.webkit.org/mailman/roster/webkit-reviewers" |
| |
| def _fetch_emails_from_page(self, url): |
| page = urllib2.urlopen(url) |
| soup = BeautifulSoup(page) |
| |
| emails = [] |
| # Grab the cells in the first column (which happens to be the bug ids). |
| for email_item in soup('li'): |
| email_link = email_item.find("a") |
| email = email_link.string.replace(" at ", "@") # The email is obfuscated using " at " instead of "@". |
| emails.append(email) |
| return emails |
| |
| @staticmethod |
| def _commiters_not_found_in_email_list(committers, emails): |
| missing_from_mailing_list = [] |
| for committer in committers: |
| for email in committer.emails: |
| if email in emails: |
| break |
| else: |
| missing_from_mailing_list.append(committer) |
| return missing_from_mailing_list |
| |
| @staticmethod |
| def _emails_not_found_in_committer_list(committers, emails): |
| email_to_committer_map = {} |
| for committer in committers: |
| for email in committer.emails: |
| email_to_committer_map[email] = committer |
| |
| return filter(lambda email: not email_to_committer_map.get(email), emails) |
| |
| def check_for_emails_missing_from_list(self, committer_list): |
| committer_emails = self._fetch_emails_from_page(self.committers_list_url) |
| list_name = "webkit-committers@lists.webkit.org" |
| |
| missing_from_mailing_list = self._commiters_not_found_in_email_list(committer_list.committers(), committer_emails) |
| print_list_if_non_empty("Committers missing from %s:" % list_name, missing_from_mailing_list) |
| |
| users_missing_from_committers = self._emails_not_found_in_committer_list(committer_list.committers(), committer_emails) |
| print_list_if_non_empty("Subcribers to %s missing from contributors.json:" % list_name, users_missing_from_committers) |
| |
| |
| reviewer_emails = self._fetch_emails_from_page(self.reviewers_list_url) |
| list_name = "webkit-reviewers@lists.webkit.org" |
| |
| missing_from_mailing_list = self._commiters_not_found_in_email_list(committer_list.reviewers(), reviewer_emails) |
| print_list_if_non_empty("Reviewers missing from %s:" % list_name, missing_from_mailing_list) |
| |
| missing_from_reviewers = self._emails_not_found_in_committer_list(committer_list.reviewers(), reviewer_emails) |
| print_list_if_non_empty("Subcribers to %s missing from reviewers in contributors.json:" % list_name, missing_from_reviewers) |
| |
| missing_from_committers = self._emails_not_found_in_committer_list(committer_list.committers(), reviewer_emails) |
| print_list_if_non_empty("Subcribers to %s completely missing from contributors.json:" % list_name, missing_from_committers) |
| |
| |
| class CommitterListFromGit(object): |
| login_to_email_address = { |
| 'aboule' : 'aboule@apple.com', |
| 'adachan' : 'adachan@apple.com', |
| 'adele' : 'adele@apple.com', |
| 'aliceli1' : 'alice.liu@apple.com', |
| 'alp' : 'alp@nuanti.com', |
| 'andersca' : 'andersca@apple.com', |
| 'antti' : 'koivisto@iki.fi', |
| 'ap' : 'ap@webkit.org', |
| 'aroben' : 'aroben@webkit.org', |
| 'bdakin' : 'bdakin@apple.com', |
| 'bdash' : 'mrowe@apple.com', |
| 'bdibello' : 'bdibello@apple.com', # Bruce DiBello, only 4 commits: r10023, r9548, r9538, r9535 |
| 'beidson' : 'beidson@apple.com', |
| 'cblu' : 'cblu@apple.com', |
| 'cpeterse' : 'cpetersen@apple.com', |
| 'darin' : 'darin@apple.com', |
| 'ddkilzer' : 'ddkilzer@webkit.org', |
| 'dsmith' : 'catfish.man@gmail.com', |
| 'eseidel' : 'eric@webkit.org', |
| 'gdennis' : 'gdennis@webkit.org', |
| 'ggaren' : 'ggaren@apple.com', |
| 'goldsmit' : 'goldsmit@apple.com', # Debbie Goldsmith, only one commit r8839 |
| 'gramps' : 'gramps@apple.com', |
| 'harrison' : 'harrison@apple.com', |
| 'hausmann' : 'hausmann@webkit.org', |
| 'honeycutt' : 'jhoneycutt@apple.com', |
| 'hyatt' : 'hyatt@apple.com', |
| 'jdevalk' : 'joost@webkit.org', |
| 'jens' : 'jens@apple.com', |
| 'justing' : 'justin.garcia@apple.com', |
| 'kali' : 'kali@apple.com', # Christy Warren, did BIDI work, 5 commits: r8815, r8802, r8801, r8791, r8773, r8603 |
| 'kdecker' : 'kdecker@apple.com', |
| 'kevino' : 'kevino@theolliviers.com', |
| 'kjk' : 'kkowalczyk@gmail.com', |
| 'kmccullo' : 'kmccullough@apple.com', |
| 'kocienda' : 'kocienda@apple.com', |
| 'lamadio' : 'lamadio@apple.com', # Lou Amadio, only 2 commits: r17949 and r17783 |
| 'lars' : 'lars@kde.org', |
| 'lweintraub' : 'lweintraub@apple.com', |
| 'lypanov' : 'lypanov@kde.org', |
| 'mhay' : 'mhay@apple.com', # Mike Hay, 3 commits: r3813, r2552, r2548 |
| 'mitz' : 'mitz@webkit.org', |
| 'mjs' : 'mjs@apple.com', |
| 'oliver' : 'oliver@apple.com', |
| 'ouch' : 'ouch@apple.com', # John Louch |
| 'pewtermoose' : 'dev+webkit@mattlilek.com', |
| 'pyeh' : 'patti@apple.com', # Patti Yeh, did VoiceOver work in WebKit |
| 'rjw' : 'rjw@apple.com', |
| 'rwlbuis' : 'rwlbuis@gmail.com', |
| 'seangies' : 'seangies@apple.com', # Sean Gies?, only 5 commits: r16600, r16592, r16511, r16489, r16484 |
| 'sfalken' : 'sfalken@apple.com', |
| 'sheridan' : 'sheridan@apple.com', # Shelly Sheridan |
| 'slewis' : 'slewis@apple.com', |
| 'staikos' : 'staikos@kde.org', |
| 'sullivan' : 'sullivan@apple.com', |
| 'thatcher' : 'timothy@apple.com', |
| 'tomernic' : 'timo@apple.com', |
| 'treat' : 'manyoso@yahoo.com', |
| 'trey' : 'trey@usa.net', |
| 'tristan' : 'tristan@apple.com', |
| 'vicki' : 'vicki@apple.com', |
| 'voas' : 'voas@apple.com', # Ed Voas, did some Carbon work in WebKit |
| 'weinig' : 'sam@webkit.org', |
| 'zack' : 'zack@kde.org', |
| 'zecke' : 'zecke@selfish.org', |
| 'zimmermann' : 'zimmermann@webkit.org', |
| } |
| |
| def __init__(self): |
| self._last_commit_time_by_author_cache = {} |
| |
| def _fetch_authors_and_last_commit_time_from_git_log(self): |
| last_commit_dates = {} |
| git_log_args = ['git', 'log', '--reverse', '--pretty=format:%ae %at'] |
| process = subprocess.Popen(git_log_args, stdout=subprocess.PIPE) |
| |
| # eric@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc 1257090899 |
| line_regexp = re.compile("^(?P<author>.+)@\S+ (?P<timestamp>\d+)$") |
| while True: |
| output_line = process.stdout.readline() |
| if output_line == '' and process.poll() != None: |
| return last_commit_dates |
| |
| match_result = line_regexp.match(output_line) |
| if not match_result: |
| _log.error("Failed to match line: %s" % output_line) |
| exit(1) |
| last_commit_dates[match_result.group('author')] = float(match_result.group('timestamp')) |
| |
| def _fill_in_emails_for_old_logins(self): |
| authors_missing_email = filter(lambda author: author.find('@') == -1, self._last_commit_time_by_author_cache) |
| authors_with_email = filter(lambda author: author.find('@') != -1, self._last_commit_time_by_author_cache) |
| prefixes_of_authors_with_email = map(lambda author: author.split('@')[0], authors_with_email) |
| |
| for author in authors_missing_email: |
| # First check to see if we have a manual mapping from login to email. |
| author_email = self.login_to_email_address.get(author) |
| |
| # Most old logins like 'darin' are now just 'darin@apple.com', so check for a prefix match if a manual mapping was not found. |
| if not author_email and author in prefixes_of_authors_with_email: |
| author_email_index = prefixes_of_authors_with_email.index(author) |
| author_email = authors_with_email[author_email_index] |
| |
| if not author_email: |
| # No known email mapping, likely not an active committer. We could log here. |
| continue |
| |
| # _log.info("%s -> %s" % (author, author_email)) # For sanity checking. |
| no_email_commit_time = self._last_commit_time_by_author_cache.get(author) |
| email_commit_time = self._last_commit_time_by_author_cache.get(author_email) |
| # We compare the timestamps for extra sanity even though we could assume commits before email address were used for login are always going to be older. |
| if not email_commit_time or email_commit_time < no_email_commit_time: |
| self._last_commit_time_by_author_cache[author_email] = no_email_commit_time |
| del self._last_commit_time_by_author_cache[author] |
| |
| def _last_commit_by_author(self): |
| if not self._last_commit_time_by_author_cache: |
| self._last_commit_time_by_author_cache = self._fetch_authors_and_last_commit_time_from_git_log() |
| self._fill_in_emails_for_old_logins() |
| del self._last_commit_time_by_author_cache['(no author)'] # The initial svn import isn't very useful. |
| return self._last_commit_time_by_author_cache |
| |
| @staticmethod |
| def _print_three_column_row(widths, values): |
| print "%s%s%s" % (values[0].ljust(widths[0]), values[1].ljust(widths[1]), values[2]) |
| |
| def possibly_expired_committers(self, committer_list): |
| authors_and_last_commits = self._last_commit_by_author().items() |
| authors_and_last_commits.sort(lambda a,b: cmp(a[1], b[1]), reverse=True) |
| committer_cutof = date.today() - timedelta(days=365) |
| retired_authors_and_last_commits = [] |
| for (author, last_commit) in authors_and_last_commits: |
| last_commit_date = date.fromtimestamp(last_commit) |
| if committer_cutof > last_commit_date: |
| retired_authors_and_last_commits.append((author, last_commit)) |
| return retired_authors_and_last_commits |
| |
| def possibly_inactive_reviewers(self, committer_list): |
| git_log_args = ['git', 'log', '--since=1.year'] |
| process = subprocess.Popen(git_log_args, stdout=subprocess.PIPE) |
| git_output, err = process.communicate() |
| |
| comment_regex = re.compile(r'^Date: .+?\n+(.+?)(?:^commit |\Z)', re.MULTILINE | re.DOTALL) |
| reviewed_by_regexp = re.compile(ChangeLogEntry.reviewed_by_regexp, re.MULTILINE) |
| |
| reviewers = committer_list.reviewers() |
| |
| for comment in comment_regex.findall(git_output): |
| reviewer_match = reviewed_by_regexp.search(comment) |
| if reviewer_match: |
| reviewers_text = reviewer_match.group('reviewer').decode('utf-8', 'backslashreplace') |
| # reviewers might be something like "Darin Adler and Dave Hyatt". |
| # Rather than trying to fuzzy match names, find known reviewers and remove them from the list. |
| for reviewer in reviewers: |
| if reviewer.mentioned_in_text(reviewers_text): |
| reviewers.remove(reviewer) |
| break |
| |
| return reviewers |
| |
| def print_possibly_expired_committers(self, committer_list): |
| retired_authors_and_last_commits = self.possibly_expired_committers(committer_list) |
| column_widths = [13, 25] |
| print |
| print "Committers who have not committed within one year:" |
| self._print_three_column_row(column_widths, ("Last Commit", "Committer Email", "Committer Record")) |
| for (author, last_commit) in retired_authors_and_last_commits: |
| committer_record = committer_list.committer_by_email(author) |
| last_commit_date = date.fromtimestamp(last_commit) |
| self._print_three_column_row(column_widths, (str(last_commit_date), author, committer_record)) |
| |
| def print_possibly_inactive_reviewers(self, committer_list): |
| inactive_reviewers = self.possibly_inactive_reviewers(committer_list) |
| |
| column_widths = [13, 25] |
| print |
| print "Reviewers who have not reviewed within one year:" |
| for contributor in inactive_reviewers: |
| print "\"{}\" {}".format(contributor.full_name.encode("utf-8"), contributor.bugzilla_email()) |
| |
| def print_committers_missing_from_committer_list(self, committer_list): |
| missing_from_contributors_json = [] |
| last_commit_time_by_author = self._last_commit_by_author() |
| for author in last_commit_time_by_author: |
| if not committer_list.committer_by_email(author): |
| missing_from_contributors_json.append(author) |
| |
| never_committed = [] |
| for committer in committer_list.committers(): |
| for email in committer.emails: |
| if last_commit_time_by_author.get(email): |
| break |
| else: |
| never_committed.append(committer) |
| |
| print_list_if_non_empty("Historical committers missing from contributors.json:", missing_from_contributors_json) |
| print_list_if_non_empty("Committers in contributors.json who have never committed:", never_committed) |
| |
| |
| class CommitterListBugzillaChecker(object): |
| def __init__(self): |
| self._bugzilla = Bugzilla() |
| |
| def _has_invalid_bugzilla_email(self, committer): |
| return self._bugzilla.queries.is_invalid_bugzilla_email(committer.bugzilla_email()) |
| |
| def print_committers_with_invalid_bugzilla_emails(self, committer_list): |
| print # Print a newline before we start hitting bugzilla (it logs about logging in). |
| print "Checking committer emails against bugzilla (this will take a long time)" |
| committers_with_invalid_bugzilla_email = filter(self._has_invalid_bugzilla_email, committer_list.committers()) |
| print_list_if_non_empty("Committers with invalid bugzilla email:", committers_with_invalid_bugzilla_email) |
| |
| |
| def dump_mailmap(committer_list): |
| def format_email(email): |
| return "<{0}>".format(email) |
| |
| def format_email_with_gitsvn_uuid(email): |
| return "<{0}@268f45cc-cd09-0410-ab3c-d52691b4dbfc>".format(email) |
| |
| email_to_legacy_username = dict(map(reversed, CommitterListFromGit.login_to_email_address.items())) |
| def map_emails_to_legacy_username(emails): |
| legacy_username = None |
| for email in emails: |
| legacy_username = email_to_legacy_username.get(email) |
| if legacy_username: |
| break |
| return legacy_username |
| |
| for contributor in committer_list.contributors(): |
| full_name = contributor.full_name.encode("utf-8") |
| canonical_email = contributor.bugzilla_email() |
| other_emails = contributor.emails |
| legacy_username = map_emails_to_legacy_username(contributor.emails) |
| if legacy_username: |
| other_emails.append(legacy_username) |
| |
| for other_email in other_emails: |
| print full_name, format_email(canonical_email), format_email(other_email) |
| print full_name, format_email(canonical_email), format_email_with_uuid(other_email) |
| |
| |
| def main(): |
| parser = OptionParser() |
| parser.add_option("-b", "--check-bugzilla-emails", action="store_true", help="Check the bugzilla_email for each committer against bugs.webkit.org") |
| parser.add_option("-d", "--dump", action="store_true", help="Dump the contributor list as JSON to stdout (suitable for saving to contributors.json)") |
| parser.add_option("--dump-mailmap", action="store_true", help="Dump the contributor list as a Git Mailmap to stdout") |
| parser.add_option("-c", "--canonicalize", action="store_true", help="Canonicalize contributors.json, rewriting it in-place") |
| |
| (options, args) = parser.parse_args() |
| |
| committer_list = CommitterList() |
| if options.dump: |
| print committer_list.as_json() |
| return 0 |
| |
| if options.dump_mailmap: |
| dump_mailmap(committer_list) |
| return 0 |
| |
| if options.canonicalize: |
| print "Updating contributors.json in-place..." |
| committer_list.reformat_in_place() |
| print "Done" |
| return 0 |
| |
| CommitterListFromMailingList().check_for_emails_missing_from_list(committer_list) |
| |
| if not Git.in_working_directory("."): |
| print """\n\nWARNING: validate-committer-lists requires a git checkout. |
| The following checks are disabled: |
| - List of inactive committers |
| - List of inactive reviewers |
| - List of historical committers missing from contributors.json |
| """ |
| return 1 |
| |
| svn_committer_list = CommitterListFromGit() |
| svn_committer_list.print_possibly_expired_committers(committer_list) |
| svn_committer_list.print_possibly_inactive_reviewers(committer_list) |
| svn_committer_list.print_committers_missing_from_committer_list(committer_list) |
| |
| if options.check_bugzilla_emails: |
| CommitterListBugzillaChecker().print_committers_with_invalid_bugzilla_emails(committer_list) |
| |
| |
| if __name__ == "__main__": |
| main() |