Tools/Scripts/webkitpy/common/checkout/changelog.py - WebKit - Git at Google

 # Copyright (C) 2009, Google Inc. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 #     * Redistributions of source code must retain the above copyright
 # notice, this list of conditions and the following disclaimer.
 #     * Redistributions in binary form must reproduce the above
 # copyright notice, this list of conditions and the following disclaimer
 # in the documentation and/or other materials provided with the
 # distribution.
 #     * Neither the name of Google Inc. nor the names of its
 # contributors may be used to endorse or promote products derived from
 # this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #
 # WebKit's Python module for parsing and modifying ChangeLog files

 import logging
 import re
 import textwrap

 from webkitpy.common.config.committers import CommitterList
 from webkitpy.common.system.filesystem import FileSystem
 from webkitpy.common.unicode_compatibility import StringIO, unicode
 import webkitpy.common.config.urls as config_urls

 _log = logging.getLogger(__name__)


 # FIXME: parse_bug_id_from_changelog should not be a free function.
 # Parse the bug ID out of a Changelog message based on the format that is
 # used by prepare-ChangeLog
 def parse_bug_id_from_changelog(message):
     if not message:
         return None
     match = re.search("^\s*" + config_urls.bug_url_short + "$", message, re.MULTILINE)
     if match:
         return int(match.group('bug_id'))
     match = re.search("^\s*" + config_urls.bug_url_long + "$", message, re.MULTILINE)
     if match:
         return int(match.group('bug_id'))
     # We weren't able to find a bug URL in the format used by prepare-ChangeLog. Fall back to the
     # first bug URL found anywhere in the message.
     return config_urls.parse_bug_id(message)


 class ChangeLogEntry(object):
     # e.g. 2009-06-03  Eric Seidel  <eric@webkit.org>
     date_line_regexp = r'^(?P<date>\d{4}-\d{2}-\d{2})\s+(?P<authors>(?P<name>[^<]+?)\s+<(?P<email>[^<>]+)>.*?)$'

     # e.g. * Source/WebCore/page/EventHandler.cpp: Implement FooBarQuux.
     touched_files_regexp = r'^\s*\*\s*(?P<file>[A-Za-z0-9_\-\./\\]+)\s*\:'
     # e.g. (ChangeLogEntry.touched_functions): Added.
     touched_functions_regexp = r'^\s*\((?P<function>[^)]*)\):'

     radar_id_regexp = r'^\s*(<?rdar://problems?/)?(?P<radar_id>-?\d{7,})>?'

     # e.g. Reviewed by Darin Adler.
     # (Discard everything after the first period to match more invalid lines.)
     reviewed_by_regexp = r'^\s*((\w+\s+)+and\s+)?(Review|Rubber(\s*|-)stamp)(s|ed)?\s+([a-z]+\s+)*?by\s+(?P<reviewer>.*?)[\.,]?\s*$'

     reviewed_byless_regexp = r'^\s*((Review|Rubber(\s*|-)stamp)(s|ed)?|RS)(\s+|\s*=\s*)(?P<reviewer>([A-Z]\w+\s*)+)[\.,]?\s*$'

     reviewer_name_noise_regexp = re.compile(r"""
     (\s+((tweaked\s+)?and\s+)?(landed|committed|okayed)\s+by.+) # "landed by", "commented by", etc...
     |(^(Reviewed\s+)?by\s+) # extra "Reviewed by" or "by"
     |([(<]\s*[\w_\-\.]+@[\w_\-\.]+[>)]) # email addresses
     |([(<](https?://?bugs.)webkit.org[^>)]+[>)]) # bug url
     |("[^"]+") # wresler names like 'Sean/Shawn/Shaun' in 'Geoffrey "Sean/Shawn/Shaun" Garen'
     |('[^']+') # wresler names like "The Belly" in "Sam 'The Belly' Weinig"
     |((Mr|Ms|Dr|Mrs|Prof)\.(\s+|$))
     """, re.IGNORECASE | re.VERBOSE)

     reviewer_name_casesensitive_noise_regexp = re.compile(r"""
     ((\s+|^)(and\s+)?([a-z-]+\s+){5,}by\s+) # e.g. "and given a good once-over by"
     |(\(\s*(?!(and|[A-Z])).+\)) # any parenthesis that doesn't start with "and" or a capital letter
     |(with(\s+[a-z-]+)+) # phrases with "with no hesitation" in "Sam Weinig with no hesitation"
     """, re.VERBOSE)

     reviewer_name_noise_needing_a_backreference_regexp = re.compile(r"""
     (\S\S)\.(?:(\s.+|$)) # Text after the two word characters (don't match initials) and a period followed by a space.
     """, re.IGNORECASE | re.VERBOSE)

     nobody_regexp = re.compile(r"""(\s+|^)nobody(
     ((,|\s+-)?\s+(\w+\s+)+fix.*) # e.g. nobody, build fix...
     |(\s*\([^)]+\).*) # NOBODY (..)...
     |$)""", re.IGNORECASE | re.VERBOSE)

     # e.g. == Rolled over to ChangeLog-2011-02-16 ==
     rolled_over_regexp = r'^== Rolled over to ChangeLog-\d{4}-\d{2}-\d{2} ==$'

     # e.g. git-svn-id: http://svn.webkit.org/repository/webkit/trunk@96161 268f45cc-cd09-0410-ab3c-d52691b4dbfc
     svn_id_regexp = r'git-svn-id: http://svn.webkit.org/repository/webkit/trunk@(?P<svnid>\d+) '

     split_names_regexp = r'\s*(?:,(?:\s+and\s+|&)?|(?:^|\s+)and\s+|&&|[/+&])\s*'

     def __init__(self, contents, committer_list=None, revision=None):
         self._contents = contents
         self._committer_list = committer_list or CommitterList()
         self._revision = revision
         self._parse_entry()

     @classmethod
     def _parse_radar_id(cls, text):
         if not text:
             return None
         match = re.search(ChangeLogEntry.radar_id_regexp, text, re.MULTILINE | re.IGNORECASE)
         if not match:
             return None
         radar_id = int(match.group('radar_id'))
         if radar_id < 0:
             return None

         return radar_id

     @classmethod
     def _parse_reviewer_text(cls, text):
         match = re.search(ChangeLogEntry.reviewed_by_regexp, text, re.MULTILINE | re.IGNORECASE)
         if not match:
             # There are cases where people omit "by". We match it only if reviewer part looked nice
             # in order to avoid matching random lines that start with Reviewed
             match = re.search(ChangeLogEntry.reviewed_byless_regexp, text, re.MULTILINE | re.IGNORECASE)
         if not match:
             return None, None

         reviewer_text = match.group("reviewer")

         reviewer_text = ChangeLogEntry.nobody_regexp.sub('', reviewer_text)
         reviewer_text = ChangeLogEntry.reviewer_name_noise_regexp.sub('', reviewer_text)
         reviewer_text = ChangeLogEntry.reviewer_name_casesensitive_noise_regexp.sub('', reviewer_text)
         reviewer_text = ChangeLogEntry.reviewer_name_noise_needing_a_backreference_regexp.sub(r'\1', reviewer_text)
         reviewer_text = reviewer_text.replace('(', '').replace(')', '')
         reviewer_text = re.sub(r'\s\s+|[,.]\s*$', ' ', reviewer_text).strip()
         if not len(reviewer_text):
             return None, None

         reviewer_list = ChangeLogEntry._split_reviewer_names(reviewer_text)

         # Get rid of "reviewers" like "even though this is just a..." in "Reviewed by Sam Weinig, even though this is just a..."
         # and "who wrote the original code" in "Noam Rosenthal, who wrote the original code"
         reviewer_list = [reviewer for reviewer in reviewer_list if not re.match('^who\s|^([a-z]+(\s+|\.|$)){6,}$', reviewer)]

         return reviewer_text, reviewer_list

     @classmethod
     def _split_reviewer_names(cls, text):
         return re.split(ChangeLogEntry.split_names_regexp, text)

     @classmethod
     def _split_author_names_with_emails(cls, text):
         regex = '>' + ChangeLogEntry.split_names_regexp
         names = re.split(regex, text)
         if len(names) > 1:
             names = [name + ">" for name in names[:-1]] + [names[-1]]
         return names

     def _fuzz_match_reviewers(self, reviewers_text_list):
         if not reviewers_text_list:
             return []
         list_of_reviewers = [self._committer_list.contributors_by_fuzzy_match(reviewer)[0] for reviewer in reviewers_text_list]
         # Flatten lists and get rid of any reviewers with more than one candidate.
         return [reviewers[0] for reviewers in list_of_reviewers if len(reviewers) == 1]

     @classmethod
     def _parse_author_name_and_email(cls, author_name_and_email):
         match = re.match(r'(?P<name>.+?)\s+<(?P<email>[^>]+)>', author_name_and_email)
         return {'name': match.group("name"), 'email': match.group("email")}

     @classmethod
     def _parse_author_text(cls, text):
         if not text:
             return []
         authors = cls._split_author_names_with_emails(text)
         assert(authors and len(authors) >= 1)
         return [cls._parse_author_name_and_email(author) for author in authors]

     @classmethod
     def _parse_touched_functions(cls, text):
         result = {}
         cur_file = None
         for line in text.splitlines():
             file_match = re.match(cls.touched_files_regexp, line)
             if file_match:
                 cur_file = file_match.group("file")
                 result[cur_file] = []
             func_match = re.match(cls.touched_functions_regexp, line)
             if func_match and cur_file:
                 result[cur_file].append(func_match.group("function"))
         return result

     @classmethod
     def _parse_bug_description(cls, text):
         # Line 3 is the bug description in most cases.
         lines = text.splitlines()
         if len(lines) < 3:
             return None
         found_reviewed = re.search(ChangeLogEntry.reviewed_by_regexp, lines[2], re.IGNORECASE)
         found_reviewed_byless = re.search(ChangeLogEntry.reviewed_byless_regexp, lines[2], re.IGNORECASE)
         found_url = parse_bug_id_from_changelog(lines[2])
         if found_reviewed or found_reviewed_byless or found_url:
             return None
         return lines[2].strip()

     def _parse_entry(self):
         match = re.match(self.date_line_regexp, self._contents, re.MULTILINE)
         if not match:
             _log.warning("Creating invalid ChangeLogEntry:\n%s" % self._contents)

         self._date_line = match.group()
         self._date = match.group("date")
         self._bug_description = self._parse_bug_description(self._contents)

         # FIXME: group("name") does not seem to be Unicode?  Probably due to self._contents not being unicode.
         self._author_text = match.group("authors") if match else None
         self._authors = ChangeLogEntry._parse_author_text(self._author_text)

         self._reviewer_text, self._reviewers_text_list = ChangeLogEntry._parse_reviewer_text(self._contents)
         self._reviewers = self._fuzz_match_reviewers(self._reviewers_text_list)
         self._author = self._committer_list.contributor_by_email(self.author_email()) or self._committer_list.contributor_by_name(self.author_name())

         self._touched_files = re.findall(self.touched_files_regexp, self._contents, re.MULTILINE)
         self._touched_functions = self._parse_touched_functions(self._contents)

     def date_line(self):
         return self._date_line

     def date(self):
         return self._date

     def author_text(self):
         return self._author_text

     def revision(self):
         return self._revision

     def author_name(self):
         return self._authors[0]['name']

     def author_email(self):
         return self._authors[0]['email']

     def author(self):
         return self._author  # Might be None

     def authors(self):
         return self._authors

     # FIXME: Eventually we would like to map reviwer names to reviewer objects.
     # See https://bugs.webkit.org/show_bug.cgi?id=26533
     def reviewer_text(self):
         return self._reviewer_text

     # Might be None, might also not be a Reviewer!
     def reviewer(self):
         return self._reviewers[0] if len(self._reviewers) > 0 else None

     def reviewers(self):
         return self._reviewers

     def has_valid_reviewer(self):
         if self._reviewers_text_list:
             for reviewer in self._reviewers_text_list:
                 reviewer = self._committer_list.reviewer_by_name(reviewer)
                 if reviewer:
                     return True
         return bool(re.search("unreviewed", self._contents, re.IGNORECASE))

     def contents(self):
         return self._contents

     def bug_id(self):
         return parse_bug_id_from_changelog(self._contents)

     def bug_description(self):
         return self._bug_description

     def touched_files(self):
         return self._touched_files

     # Returns a dict from file name to lists of function names.
     def touched_functions(self):
         return self._touched_functions

     def touched_files_text(self):
         match = re.search(self.touched_files_regexp, self._contents, re.MULTILINE)
         return self._contents[match.start():].lstrip("\n\r") if match else ""

     # Determine if any text has been added to the section on touched files
     def is_touched_files_text_clean(self):
         file_line_end = r"( (Added|Removed|(Copied|Renamed) from [A-Za-z0-9_\-./\\]+).)?$"
         for line in self.touched_files_text().splitlines():
             if re.match(self.touched_files_regexp + file_line_end, line):
                 continue
             if re.match(self.touched_functions_regexp + "$", line):
                 continue
             return False
         return True


 # FIXME: Various methods on ChangeLog should move into ChangeLogEntry instead.
 class ChangeLog(object):

     def __init__(self, path, filesystem=None):
         self.path = path
         self._filesystem = filesystem or FileSystem()

     _changelog_indent = " " * 8

     @classmethod
     def parse_latest_entry_from_file(cls, changelog_file):
         try:
             return next(cls.parse_entries_from_file(changelog_file))
         except StopIteration as e:
             return None

     svn_blame_regexp = re.compile(r'^(\s*(?P<revision>\d+) [^ ]+)\s*(?P<line>.*?\n)')

     @classmethod
     def _separate_revision_and_line(cls, line):
         match = cls.svn_blame_regexp.match(line)
         if not match:
             return None, line
         return int(match.group('revision')), match.group('line')

     @classmethod
     def parse_entries_from_file(cls, changelog_file):
         """changelog_file must be a file-like object which returns
         unicode strings, e.g. from StringIO(unicode()) or
         fs.open_text_file_for_reading()"""
         date_line_regexp = re.compile(ChangeLogEntry.date_line_regexp)
         rolled_over_regexp = re.compile(ChangeLogEntry.rolled_over_regexp)

         # The first line should be a date line.
         revision, first_line = cls._separate_revision_and_line(changelog_file.readline())
         assert(isinstance(first_line, unicode))
         if not date_line_regexp.match(cls.svn_blame_regexp.sub('', first_line)):
             raise StopIteration

         entry_lines = [first_line]
         revisions_in_entry = {revision: 1} if revision != None else None
         for line in changelog_file:
             if revisions_in_entry:
                 revision, line = cls._separate_revision_and_line(line)

             if rolled_over_regexp.match(line):
                 break

             if date_line_regexp.match(line):
                 most_probable_revision = max(revisions_in_entry, key=revisions_in_entry.__getitem__) if revisions_in_entry else None
                 # Remove the extra newline at the end
                 yield ChangeLogEntry(''.join(entry_lines[:-1]), revision=most_probable_revision)
                 entry_lines = []
                 revisions_in_entry = {revision: 0}

             entry_lines.append(line)
             if revisions_in_entry:
                 revisions_in_entry[revision] = revisions_in_entry.get(revision, 0) + 1

         most_probable_revision = max(revisions_in_entry, key=revisions_in_entry.__getitem__) if revisions_in_entry else None
         yield ChangeLogEntry(''.join(entry_lines[:-1]), revision=most_probable_revision)

     def latest_entry(self):
         # ChangeLog files are always UTF-8, we read them in as such to support Reviewers with unicode in their names.
         changelog_file = self._filesystem.open_text_file_for_reading(self.path)
         try:
             return self.parse_latest_entry_from_file(changelog_file)
         finally:
             changelog_file.close()

     # _wrap_line and _wrap_lines exist to work around
     # http://bugs.python.org/issue1859

     def _wrap_line(self, line):
         return textwrap.fill(line,
                              width=70,
                              initial_indent=self._changelog_indent,
                              # Don't break urls which may be longer than width.
                              break_long_words=False,
                              subsequent_indent=self._changelog_indent)

     # Workaround as suggested by guido in
     # http://bugs.python.org/issue1859#msg60040

     def _wrap_lines(self, message):
         lines = [self._wrap_line(line) for line in message.splitlines()]
         return "\n".join(lines)

     def update_with_unreviewed_message(self, message):
         first_boilerplate_line_regexp = re.compile(
                 "%sNeed a short description \(OOPS!\)\." % self._changelog_indent)
         removing_boilerplate = False
         result = StringIO()
         with self._filesystem.open_text_file_for_reading(self.path) as file:
             for line in file:
                 if first_boilerplate_line_regexp.search(line):
                     message_lines = self._wrap_lines(message)
                     result.write(first_boilerplate_line_regexp.sub(message_lines, line))
                     # Remove all the ChangeLog boilerplate, except the first line (date, name, e-mail).
                     removing_boilerplate = True
                 elif removing_boilerplate:
                     if re.search("^[1-9]", line):  # each changelog entry is preceded by a date
                         removing_boilerplate = False

                 if not removing_boilerplate:
                     result.write(line)
         self._filesystem.write_text_file(self.path, result.getvalue())

     def set_reviewer(self, reviewer):
         latest_entry = self.latest_entry()
         latest_entry_contents = latest_entry.contents()
         reviewer_text = latest_entry.reviewer()
         found_nobody = re.search("NOBODY\s*\(OOPS!\)", latest_entry_contents, re.MULTILINE)
         found_reviewer_or_unreviewed = latest_entry.has_valid_reviewer()
         if not found_nobody and not found_reviewer_or_unreviewed and not reviewer_text:
             bug_url_number_of_items = len(re.findall(config_urls.bug_url_long, latest_entry_contents, re.MULTILINE))
             bug_url_number_of_items += len(re.findall(config_urls.bug_url_short, latest_entry_contents, re.MULTILINE))
             result = StringIO()
             with self._filesystem.open_text_file_for_reading(self.path) as file:
                 for line in file:
                     found_bug_url = re.search(config_urls.bug_url_long, line)
                     if not found_bug_url:
                         found_bug_url = re.search(config_urls.bug_url_short, line)
                     result.write(line)
                     if found_bug_url:
                         if bug_url_number_of_items == 1:
                             result.write("\n        Reviewed by %s.\n" % reviewer)
                         bug_url_number_of_items -= 1
             self._filesystem.write_text_file(self.path, result.getvalue())
         else:
             data = self._filesystem.read_text_file(self.path)
             newdata = data.replace("NOBODY (OOPS!)", reviewer)
             self._filesystem.write_text_file(self.path, newdata)

     def set_short_description_and_bug_url(self, short_description, bug_url):
         result = StringIO()
         with self._filesystem.open_text_file_for_reading(self.path) as file:
             short_description_placeholder = "Need a short description (OOPS!)."
             bug_url_placeholder = "Need the bug URL (OOPS!)."
             for line in file:
                 stripped = line.strip()
                 if stripped == short_description_placeholder:
                     line = self._changelog_indent + short_description + "\n"
                 if stripped == bug_url_placeholder:
                     line = self._changelog_indent + bug_url + "\n"
                 result.write(line)
         self._filesystem.write_text_file(self.path, result.getvalue())

     def delete_entries(self, num_entries):
         date_line_regexp = re.compile(ChangeLogEntry.date_line_regexp)
         rolled_over_regexp = re.compile(ChangeLogEntry.rolled_over_regexp)
         entries = 0
         result = StringIO()
         with self._filesystem.open_text_file_for_reading(self.path) as file:
             for line in file:
                 if date_line_regexp.match(line):
                     entries += 1
                 elif rolled_over_regexp.match(line):
                     entries = num_entries + 1
                 if entries > num_entries:
                     result.write(line)
         self._filesystem.write_text_file(self.path, result.getvalue())

     def prepend_text(self, text):
         data = self._filesystem.read_text_file(self.path)
         self._filesystem.write_text_file(self.path, text + data)
	# Copyright (C) 2009, Google Inc. All rights reserved.
	#
	# Redistribution and use in source and binary forms, with or without
	# modification, are permitted provided that the following conditions are
	# met:
	#
	# * Redistributions of source code must retain the above copyright
	# notice, this list of conditions and the following disclaimer.
	# * Redistributions in binary form must reproduce the above
	# copyright notice, this list of conditions and the following disclaimer
	# in the documentation and/or other materials provided with the
	# distribution.
	# * Neither the name of Google Inc. nor the names of its
	# contributors may be used to endorse or promote products derived from
	# this software without specific prior written permission.
	#
	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	#
	# WebKit's Python module for parsing and modifying ChangeLog files

	import logging
	import re
	import textwrap

	from webkitpy.common.config.committers import CommitterList
	from webkitpy.common.system.filesystem import FileSystem
	from webkitpy.common.unicode_compatibility import StringIO, unicode
	import webkitpy.common.config.urls as config_urls

	_log = logging.getLogger(__name__)


	# FIXME: parse_bug_id_from_changelog should not be a free function.
	# Parse the bug ID out of a Changelog message based on the format that is
	# used by prepare-ChangeLog
	def parse_bug_id_from_changelog(message):
	if not message:
	return None
	match = re.search("^\s*" + config_urls.bug_url_short + "$", message, re.MULTILINE)
	if match:
	return int(match.group('bug_id'))
	match = re.search("^\s*" + config_urls.bug_url_long + "$", message, re.MULTILINE)
	if match:
	return int(match.group('bug_id'))
	# We weren't able to find a bug URL in the format used by prepare-ChangeLog. Fall back to the
	# first bug URL found anywhere in the message.
	return config_urls.parse_bug_id(message)


	class ChangeLogEntry(object):
	# e.g. 2009-06-03 Eric Seidel <eric@webkit.org>
	date_line_regexp = r'^(?P<date>\d{4}-\d{2}-\d{2})\s+(?P<authors>(?P<name>[^<]+?)\s+<(?P<email>[^<>]+)>.*?)$'

	# e.g. * Source/WebCore/page/EventHandler.cpp: Implement FooBarQuux.
	touched_files_regexp = r'^\s\\s(?P<file>[A-Za-z0-9_\-\./\\]+)\s\:'
	# e.g. (ChangeLogEntry.touched_functions): Added.
	touched_functions_regexp = r'^\s\((?P<function>[^)])\):'

	radar_id_regexp = r'^\s*(<?rdar://problems?/)?(?P<radar_id>-?\d{7,})>?'

	# e.g. Reviewed by Darin Adler.
	# (Discard everything after the first period to match more invalid lines.)
	reviewed_by_regexp = r'^\s((\w+\s+)+and\s+)?(Review\|Rubber(\s\|-)stamp)(s\|ed)?\s+([a-z]+\s+)?by\s+(?P<reviewer>.?)[\.,]?\s*$'

	reviewed_byless_regexp = r'^\s((Review\|Rubber(\s\|-)stamp)(s\|ed)?\|RS)(\s+\|\s=\s)(?P<reviewer>([A-Z]\w+\s)+)[\.,]?\s$'

	reviewer_name_noise_regexp = re.compile(r"""
	(\s+((tweaked\s+)?and\s+)?(landed\|committed\|okayed)\s+by.+) # "landed by", "commented by", etc...
	\|(^(Reviewed\s+)?by\s+) # extra "Reviewed by" or "by"
	\|([(<]\s*[\w_\-\.]+@[\w_\-\.]+[>)]) # email addresses
	\|([(<](https?://?bugs.)webkit.org[^>)]+[>)]) # bug url
	\|("[^"]+") # wresler names like 'Sean/Shawn/Shaun' in 'Geoffrey "Sean/Shawn/Shaun" Garen'
	\|('[^']+') # wresler names like "The Belly" in "Sam 'The Belly' Weinig"
	\|((Mr\|Ms\|Dr\|Mrs\|Prof)\.(\s+\|$))
	""", re.IGNORECASE \| re.VERBOSE)

	reviewer_name_casesensitive_noise_regexp = re.compile(r"""
	((\s+\|^)(and\s+)?([a-z-]+\s+){5,}by\s+) # e.g. "and given a good once-over by"
	\|(\(\s*(?!(and\|[A-Z])).+\)) # any parenthesis that doesn't start with "and" or a capital letter
	\|(with(\s+[a-z-]+)+) # phrases with "with no hesitation" in "Sam Weinig with no hesitation"
	""", re.VERBOSE)

	reviewer_name_noise_needing_a_backreference_regexp = re.compile(r"""
	(\S\S)\.(?:(\s.+\|$)) # Text after the two word characters (don't match initials) and a period followed by a space.
	""", re.IGNORECASE \| re.VERBOSE)

	nobody_regexp = re.compile(r"""(\s+\|^)nobody(
	((,\|\s+-)?\s+(\w+\s+)+fix.*) # e.g. nobody, build fix...
	\|(\s\([^)]+\).) # NOBODY (..)...
	\|$)""", re.IGNORECASE \| re.VERBOSE)

	# e.g. == Rolled over to ChangeLog-2011-02-16 ==
	rolled_over_regexp = r'^== Rolled over to ChangeLog-\d{4}-\d{2}-\d{2} ==$'

	# e.g. git-svn-id: http://svn.webkit.org/repository/webkit/trunk@96161 268f45cc-cd09-0410-ab3c-d52691b4dbfc
	svn_id_regexp = r'git-svn-id: http://svn.webkit.org/repository/webkit/trunk@(?P<svnid>\d+) '

	split_names_regexp = r'\s(?:,(?:\s+and\s+\|&)?\|(?:^\|\s+)and\s+\|&&\|[/+&])\s'

	def __init__(self, contents, committer_list=None, revision=None):
	self._contents = contents
	self._committer_list = committer_list or CommitterList()
	self._revision = revision
	self._parse_entry()

	@classmethod
	def _parse_radar_id(cls, text):
	if not text:
	return None
	match = re.search(ChangeLogEntry.radar_id_regexp, text, re.MULTILINE \| re.IGNORECASE)
	if not match:
	return None
	radar_id = int(match.group('radar_id'))
	if radar_id < 0:
	return None

	return radar_id

	@classmethod
	def _parse_reviewer_text(cls, text):
	match = re.search(ChangeLogEntry.reviewed_by_regexp, text, re.MULTILINE \| re.IGNORECASE)
	if not match:
	# There are cases where people omit "by". We match it only if reviewer part looked nice
	# in order to avoid matching random lines that start with Reviewed
	match = re.search(ChangeLogEntry.reviewed_byless_regexp, text, re.MULTILINE \| re.IGNORECASE)
	if not match:
	return None, None

	reviewer_text = match.group("reviewer")

	reviewer_text = ChangeLogEntry.nobody_regexp.sub('', reviewer_text)
	reviewer_text = ChangeLogEntry.reviewer_name_noise_regexp.sub('', reviewer_text)
	reviewer_text = ChangeLogEntry.reviewer_name_casesensitive_noise_regexp.sub('', reviewer_text)
	reviewer_text = ChangeLogEntry.reviewer_name_noise_needing_a_backreference_regexp.sub(r'\1', reviewer_text)
	reviewer_text = reviewer_text.replace('(', '').replace(')', '')
	reviewer_text = re.sub(r'\s\s+\|[,.]\s*$', ' ', reviewer_text).strip()
	if not len(reviewer_text):
	return None, None

	reviewer_list = ChangeLogEntry._split_reviewer_names(reviewer_text)

	# Get rid of "reviewers" like "even though this is just a..." in "Reviewed by Sam Weinig, even though this is just a..."
	# and "who wrote the original code" in "Noam Rosenthal, who wrote the original code"
	reviewer_list = [reviewer for reviewer in reviewer_list if not re.match('^who\s\|^([a-z]+(\s+\|\.\|$)){6,}$', reviewer)]

	return reviewer_text, reviewer_list

	@classmethod
	def _split_reviewer_names(cls, text):
	return re.split(ChangeLogEntry.split_names_regexp, text)

	@classmethod
	def _split_author_names_with_emails(cls, text):
	regex = '>' + ChangeLogEntry.split_names_regexp
	names = re.split(regex, text)
	if len(names) > 1:
	names = [name + ">" for name in names[:-1]] + [names[-1]]
	return names

	def _fuzz_match_reviewers(self, reviewers_text_list):
	if not reviewers_text_list:
	return []
	list_of_reviewers = [self._committer_list.contributors_by_fuzzy_match(reviewer)[0] for reviewer in reviewers_text_list]
	# Flatten lists and get rid of any reviewers with more than one candidate.
	return [reviewers[0] for reviewers in list_of_reviewers if len(reviewers) == 1]

	@classmethod
	def _parse_author_name_and_email(cls, author_name_and_email):
	match = re.match(r'(?P<name>.+?)\s+<(?P<email>[^>]+)>', author_name_and_email)
	return {'name': match.group("name"), 'email': match.group("email")}

	@classmethod
	def _parse_author_text(cls, text):
	if not text:
	return []
	authors = cls._split_author_names_with_emails(text)
	assert(authors and len(authors) >= 1)
	return [cls._parse_author_name_and_email(author) for author in authors]

	@classmethod
	def _parse_touched_functions(cls, text):
	result = {}
	cur_file = None
	for line in text.splitlines():
	file_match = re.match(cls.touched_files_regexp, line)
	if file_match:
	cur_file = file_match.group("file")
	result[cur_file] = []
	func_match = re.match(cls.touched_functions_regexp, line)
	if func_match and cur_file:
	result[cur_file].append(func_match.group("function"))
	return result

	@classmethod
	def _parse_bug_description(cls, text):
	# Line 3 is the bug description in most cases.
	lines = text.splitlines()
	if len(lines) < 3:
	return None
	found_reviewed = re.search(ChangeLogEntry.reviewed_by_regexp, lines[2], re.IGNORECASE)
	found_reviewed_byless = re.search(ChangeLogEntry.reviewed_byless_regexp, lines[2], re.IGNORECASE)
	found_url = parse_bug_id_from_changelog(lines[2])
	if found_reviewed or found_reviewed_byless or found_url:
	return None
	return lines[2].strip()

	def _parse_entry(self):
	match = re.match(self.date_line_regexp, self._contents, re.MULTILINE)
	if not match:
	_log.warning("Creating invalid ChangeLogEntry:\n%s" % self._contents)

	self._date_line = match.group()
	self._date = match.group("date")
	self._bug_description = self._parse_bug_description(self._contents)

	# FIXME: group("name") does not seem to be Unicode? Probably due to self._contents not being unicode.
	self._author_text = match.group("authors") if match else None
	self._authors = ChangeLogEntry._parse_author_text(self._author_text)

	self._reviewer_text, self._reviewers_text_list = ChangeLogEntry._parse_reviewer_text(self._contents)
	self._reviewers = self._fuzz_match_reviewers(self._reviewers_text_list)
	self._author = self._committer_list.contributor_by_email(self.author_email()) or self._committer_list.contributor_by_name(self.author_name())

	self._touched_files = re.findall(self.touched_files_regexp, self._contents, re.MULTILINE)
	self._touched_functions = self._parse_touched_functions(self._contents)

	def date_line(self):
	return self._date_line

	def date(self):
	return self._date

	def author_text(self):
	return self._author_text

	def revision(self):
	return self._revision

	def author_name(self):
	return self._authors[0]['name']

	def author_email(self):
	return self._authors[0]['email']

	def author(self):
	return self._author # Might be None

	def authors(self):
	return self._authors

	# FIXME: Eventually we would like to map reviwer names to reviewer objects.
	# See https://bugs.webkit.org/show_bug.cgi?id=26533
	def reviewer_text(self):
	return self._reviewer_text

	# Might be None, might also not be a Reviewer!
	def reviewer(self):
	return self._reviewers[0] if len(self._reviewers) > 0 else None

	def reviewers(self):
	return self._reviewers

	def has_valid_reviewer(self):
	if self._reviewers_text_list:
	for reviewer in self._reviewers_text_list:
	reviewer = self._committer_list.reviewer_by_name(reviewer)
	if reviewer:
	return True
	return bool(re.search("unreviewed", self._contents, re.IGNORECASE))

	def contents(self):
	return self._contents

	def bug_id(self):
	return parse_bug_id_from_changelog(self._contents)

	def bug_description(self):
	return self._bug_description

	def touched_files(self):
	return self._touched_files

	# Returns a dict from file name to lists of function names.
	def touched_functions(self):
	return self._touched_functions

	def touched_files_text(self):
	match = re.search(self.touched_files_regexp, self._contents, re.MULTILINE)
	return self._contents[match.start():].lstrip("\n\r") if match else ""

	# Determine if any text has been added to the section on touched files
	def is_touched_files_text_clean(self):
	file_line_end = r"( (Added\|Removed\|(Copied\|Renamed) from [A-Za-z0-9_\-./\\]+).)?$"
	for line in self.touched_files_text().splitlines():
	if re.match(self.touched_files_regexp + file_line_end, line):
	continue
	if re.match(self.touched_functions_regexp + "$", line):
	continue
	return False
	return True


	# FIXME: Various methods on ChangeLog should move into ChangeLogEntry instead.
	class ChangeLog(object):

	def __init__(self, path, filesystem=None):
	self.path = path
	self._filesystem = filesystem or FileSystem()

	_changelog_indent = " " * 8

	@classmethod
	def parse_latest_entry_from_file(cls, changelog_file):
	try:
	return next(cls.parse_entries_from_file(changelog_file))
	except StopIteration as e:
	return None

	svn_blame_regexp = re.compile(r'^(\s(?P<revision>\d+) [^ ]+)\s(?P<line>.*?\n)')

	@classmethod
	def _separate_revision_and_line(cls, line):
	match = cls.svn_blame_regexp.match(line)
	if not match:
	return None, line
	return int(match.group('revision')), match.group('line')

	@classmethod
	def parse_entries_from_file(cls, changelog_file):
	"""changelog_file must be a file-like object which returns
	unicode strings, e.g. from StringIO(unicode()) or
	fs.open_text_file_for_reading()"""
	date_line_regexp = re.compile(ChangeLogEntry.date_line_regexp)
	rolled_over_regexp = re.compile(ChangeLogEntry.rolled_over_regexp)

	# The first line should be a date line.
	revision, first_line = cls._separate_revision_and_line(changelog_file.readline())
	assert(isinstance(first_line, unicode))
	if not date_line_regexp.match(cls.svn_blame_regexp.sub('', first_line)):
	raise StopIteration

	entry_lines = [first_line]
	revisions_in_entry = {revision: 1} if revision != None else None
	for line in changelog_file:
	if revisions_in_entry:
	revision, line = cls._separate_revision_and_line(line)

	if rolled_over_regexp.match(line):
	break

	if date_line_regexp.match(line):
	most_probable_revision = max(revisions_in_entry, key=revisions_in_entry.__getitem__) if revisions_in_entry else None
	# Remove the extra newline at the end
	yield ChangeLogEntry(''.join(entry_lines[:-1]), revision=most_probable_revision)
	entry_lines = []
	revisions_in_entry = {revision: 0}

	entry_lines.append(line)
	if revisions_in_entry:
	revisions_in_entry[revision] = revisions_in_entry.get(revision, 0) + 1

	most_probable_revision = max(revisions_in_entry, key=revisions_in_entry.__getitem__) if revisions_in_entry else None
	yield ChangeLogEntry(''.join(entry_lines[:-1]), revision=most_probable_revision)

	def latest_entry(self):
	# ChangeLog files are always UTF-8, we read them in as such to support Reviewers with unicode in their names.
	changelog_file = self._filesystem.open_text_file_for_reading(self.path)
	try:
	return self.parse_latest_entry_from_file(changelog_file)
	finally:
	changelog_file.close()

	# _wrap_line and _wrap_lines exist to work around
	# http://bugs.python.org/issue1859

	def _wrap_line(self, line):
	return textwrap.fill(line,
	width=70,
	initial_indent=self._changelog_indent,
	# Don't break urls which may be longer than width.
	break_long_words=False,
	subsequent_indent=self._changelog_indent)

	# Workaround as suggested by guido in
	# http://bugs.python.org/issue1859#msg60040

	def _wrap_lines(self, message):
	lines = [self._wrap_line(line) for line in message.splitlines()]
	return "\n".join(lines)

	def update_with_unreviewed_message(self, message):
	first_boilerplate_line_regexp = re.compile(
	"%sNeed a short description \(OOPS!\)\." % self._changelog_indent)
	removing_boilerplate = False
	result = StringIO()
	with self._filesystem.open_text_file_for_reading(self.path) as file:
	for line in file:
	if first_boilerplate_line_regexp.search(line):
	message_lines = self._wrap_lines(message)
	result.write(first_boilerplate_line_regexp.sub(message_lines, line))
	# Remove all the ChangeLog boilerplate, except the first line (date, name, e-mail).
	removing_boilerplate = True
	elif removing_boilerplate:
	if re.search("^[1-9]", line): # each changelog entry is preceded by a date
	removing_boilerplate = False

	if not removing_boilerplate:
	result.write(line)
	self._filesystem.write_text_file(self.path, result.getvalue())

	def set_reviewer(self, reviewer):
	latest_entry = self.latest_entry()
	latest_entry_contents = latest_entry.contents()
	reviewer_text = latest_entry.reviewer()
	found_nobody = re.search("NOBODY\s*\(OOPS!\)", latest_entry_contents, re.MULTILINE)
	found_reviewer_or_unreviewed = latest_entry.has_valid_reviewer()
	if not found_nobody and not found_reviewer_or_unreviewed and not reviewer_text:
	bug_url_number_of_items = len(re.findall(config_urls.bug_url_long, latest_entry_contents, re.MULTILINE))
	bug_url_number_of_items += len(re.findall(config_urls.bug_url_short, latest_entry_contents, re.MULTILINE))
	result = StringIO()
	with self._filesystem.open_text_file_for_reading(self.path) as file:
	for line in file:
	found_bug_url = re.search(config_urls.bug_url_long, line)
	if not found_bug_url:
	found_bug_url = re.search(config_urls.bug_url_short, line)
	result.write(line)
	if found_bug_url:
	if bug_url_number_of_items == 1:
	result.write("\n Reviewed by %s.\n" % reviewer)
	bug_url_number_of_items -= 1
	self._filesystem.write_text_file(self.path, result.getvalue())
	else:
	data = self._filesystem.read_text_file(self.path)
	newdata = data.replace("NOBODY (OOPS!)", reviewer)
	self._filesystem.write_text_file(self.path, newdata)

	def set_short_description_and_bug_url(self, short_description, bug_url):
	result = StringIO()
	with self._filesystem.open_text_file_for_reading(self.path) as file:
	short_description_placeholder = "Need a short description (OOPS!)."
	bug_url_placeholder = "Need the bug URL (OOPS!)."
	for line in file:
	stripped = line.strip()
	if stripped == short_description_placeholder:
	line = self._changelog_indent + short_description + "\n"
	if stripped == bug_url_placeholder:
	line = self._changelog_indent + bug_url + "\n"
	result.write(line)
	self._filesystem.write_text_file(self.path, result.getvalue())

	def delete_entries(self, num_entries):
	date_line_regexp = re.compile(ChangeLogEntry.date_line_regexp)
	rolled_over_regexp = re.compile(ChangeLogEntry.rolled_over_regexp)
	entries = 0
	result = StringIO()
	with self._filesystem.open_text_file_for_reading(self.path) as file:
	for line in file:
	if date_line_regexp.match(line):
	entries += 1
	elif rolled_over_regexp.match(line):
	entries = num_entries + 1
	if entries > num_entries:
	result.write(line)
	self._filesystem.write_text_file(self.path, result.getvalue())

	def prepend_text(self, text):
	data = self._filesystem.read_text_file(self.path)
	self._filesystem.write_text_file(self.path, text + data)