Tools/Scripts/webkitpy/performance_tests/perftest.py - WebKit - Git at Google

 # Copyright (C) 2012, 2013 Apple Inc. All rights reserved.
 # Copyright (C) 2012, 2013 Google Inc. All rights reserved.
 # Copyright (C) 2012 Zoltan Horvath, Adobe Systems Incorporated. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 #     * Redistributions of source code must retain the above copyright
 # notice, this list of conditions and the following disclaimer.
 #     * Redistributions in binary form must reproduce the above
 # copyright notice, this list of conditions and the following disclaimer
 # in the documentation and/or other materials provided with the
 # distribution.
 #     * Neither the name of Google Inc. nor the names of its
 # contributors may be used to endorse or promote products derived from
 # this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 import logging
 import math
 import re

 from webkitpy.port.driver import DriverInput

 DEFAULT_TEST_RUNNER_COUNT = 4

 _log = logging.getLogger(__name__)


 class PerfTestMetric(object):
     def __init__(self, path, test_file_name, metric, unit=None, aggregator=None, iterations=None):
         # FIXME: Fix runner.js to report correct metric names
         self._iterations = iterations or []
         self._unit = unit or self.metric_to_unit(metric)
         self._aggregator = aggregator
         self._metric = self.time_unit_to_metric(self._unit) if metric == 'Time' else metric
         self._path = path
         self._test_file_name = test_file_name

     def name(self):
         return self._metric

     def aggregator(self):
         return self._aggregator

     def path(self):
         return self._path

     def test_file_name(self):
         return self._test_file_name

     def has_values(self):
         return bool(self._iterations)

     def append_group(self, group_values):
         assert isinstance(group_values, list)
         self._iterations.append(group_values)

     def grouped_iteration_values(self):
         return self._iterations

     def flattened_iteration_values(self):
         return [value for group_values in self._iterations for value in group_values]

     def unit(self):
         return self._unit

     @staticmethod
     def metric_to_unit(metric):
         assert metric in ('Time', 'Malloc', 'JSHeap')
         return 'ms' if metric == 'Time' else 'bytes'

     @staticmethod
     def time_unit_to_metric(unit):
         return {'fps': 'FrameRate', 'runs/s': 'Runs', 'ms': 'Time'}[unit]


 class PerfTest(object):

     def __init__(self, port, test_name, test_path, test_runner_count=DEFAULT_TEST_RUNNER_COUNT):
         self._port = port
         self._test_name = test_name
         self._test_path = test_path
         self._description = None
         self._metrics = []
         self._test_runner_count = test_runner_count

     def test_name(self):
         return self._test_name

     def test_name_without_file_extension(self):
         return re.sub(r'\.\w+$', '', self.test_name())

     def test_path(self):
         return self._test_path

     def description(self):
         return self._description

     def prepare(self, time_out_ms):
         return True

     def _create_driver(self, no_timeout):
         return self._port.create_driver(worker_number=0, no_timeout=no_timeout)

     def run(self, time_out_ms, no_timeout=False):
         for _ in range(self._test_runner_count):
             driver = self._create_driver(no_timeout)
             try:
                 if not self._run_with_driver(driver, time_out_ms):
                     return None
             finally:
                 driver.stop()

         should_log = not self._port.get_option('profile')
         if should_log and self._description:
             _log.info('DESCRIPTION: %s' % self._description)

         results = []
         for subtest in self._metrics:
             for metric in subtest['metrics']:
                 results.append(metric)
                 if should_log and not subtest['name']:
                     legacy_chromium_bot_compatible_name = self.test_name_without_file_extension().replace('/', ': ')
                     self.log_statistics(legacy_chromium_bot_compatible_name + ': ' + metric.name(),
                         metric.flattened_iteration_values(), metric.unit())

         return results

     @staticmethod
     def log_statistics(test_name, values, unit):
         sorted_values = sorted(values)

         # Compute the mean and variance using Knuth's online algorithm (has good numerical stability).
         square_sum = 0
         mean = 0
         for i, time in enumerate(sorted_values):
             delta = time - mean
             sweep = i + 1.0
             mean += delta / sweep
             square_sum += delta * (time - mean)

         middle = int(len(sorted_values) / 2)
         mean = sum(sorted_values) / len(values)
         median = sorted_values[middle] if len(sorted_values) % 2 else (sorted_values[middle - 1] + sorted_values[middle]) / 2
         stdev = math.sqrt(square_sum / (len(sorted_values) - 1)) if len(sorted_values) > 1 else 0

         _log.info('RESULT %s= %s %s' % (test_name, mean, unit))
         _log.info('median= {median} {unit}, stdev= {stdev} {unit}, min= {min} {unit}, max= {max} {unit}'.format(
             median=median,
             stdev=round(stdev, 10),
             min=sorted_values[0],
             max=sorted_values[-1],
             unit=unit,
         ))

     _description_regex = re.compile(r'^Description: (?P<description>.*)$', re.IGNORECASE)
     _metrics_regex = re.compile(r'^(?P<subtest>[A-Za-z0-9\(\[].+?)?:(?P<metric>[A-Z][A-Za-z]+)(:(?P<aggregator>[A-Z][A-Za-z]+))? -> \[(?P<values>(\d+(\.\d+)?)(, \d+(\.\d+)?)+)\] (?P<unit>[a-z/]+)?$')

     def _run_with_driver(self, driver, time_out_ms):
         output = self.run_single(driver, self.test_path(), time_out_ms)
         self._filter_output(output)
         if self.run_failed(output):
             return False

         for line in re.split('\n', output.text):
             description_match = self._description_regex.match(line)
             if description_match:
                 self._description = description_match.group('description')
                 continue

             metric_match = self._metrics_regex.match(line)
             if not metric_match:
                 _log.error('ERROR: ' + line)
                 return False

             metric = self._ensure_metrics(metric_match.group('metric'), metric_match.group('subtest'), metric_match.group('unit'), metric_match.group('aggregator'))
             metric.append_group(list(map(lambda value: float(value), metric_match.group('values').split(', '))))

         return True

     def _ensure_metrics(self, metric_name, subtest_name='', unit=None, aggregator=None):
         try:
             subtest = next(subtest for subtest in self._metrics if subtest['name'] == subtest_name)
         except StopIteration:
             subtest = {'name': subtest_name, 'metrics': []}
             self._metrics.append(subtest)

         try:
             return next(metric for metric in subtest['metrics'] if metric.name() == metric_name)
         except StopIteration:
             path = self.test_name_without_file_extension().split('/')
             if subtest_name:
                 path += subtest_name.split('/')
             metric = PerfTestMetric(path, self._test_name, metric_name, unit, aggregator)
             subtest['metrics'].append(metric)
             return metric

     def run_single(self, driver, test_path, time_out_ms, should_run_pixel_test=False):
         return driver.run_test(DriverInput(test_path, time_out_ms, image_hash=None, should_run_pixel_test=should_run_pixel_test), stop_when_done=False)

     def run_failed(self, output):
         if output.text == None:
             pass
         elif output.error:
             _log.error('error: %s\n%s' % (self.test_name(), output.error))
         elif output.timeout:
             _log.error('timeout: %s' % self.test_name())
         elif output.crash:
             _log.error('crash: %s' % self.test_name())
         else:
             return False

         return True

     @staticmethod
     def _should_ignore_line(regexps, line):
         if not line:
             return True
         for regexp in regexps:
             if regexp.search(line):
                 return True
         return False

     @staticmethod
     def filter_ignored_lines(regexps, text):
         lines = re.split('\n', text)
         filtered_lines = [line for line in lines if not PerfTest._should_ignore_line(regexps, line)]
         return '\n'.join(filtered_lines)

     _lines_to_ignore = [
         re.compile(r"^\s+$"),
         # Following are for handle existing test like Dromaeo
         re.compile(re.escape("""main frame - has 1 onunload handler(s)""")),
         re.compile('frame \"[^"]+\" - has \\d+ onunload handler\\(s\\)'),
         # Following is for html5.html
         re.compile(re.escape("""Blocked access to external URL http://www.whatwg.org/specs/web-apps/current-work/""")),
         re.compile(r"CONSOLE MESSAGE: (line \d+: )?Blocked script execution in '[A-Za-z0-9\-\.:]+' because the document's frame is sandboxed and the 'allow-scripts' permission is not set."),
         re.compile(r"CONSOLE MESSAGE: (line \d+: )?Not allowed to load local resource"),
         # Speedometer 2.0
         re.compile(r'CONSOLE MESSAGE: (line \d+: )?DEBUG: -------------------------------'),
         re.compile(r'CONSOLE MESSAGE: (line \d+: )?DEBUG: Ember\s+: (\d\.)+'),
         re.compile(r'CONSOLE MESSAGE: (line \d+: )?DEBUG: jQuery\s+: (\d\.)+'),
         re.compile(r"CONSOLE MESSAGE: Consider using 'dppx' units instead of '.+"),
     ]

     _errors_to_ignore_in_sierra = [
         # GC errors on macOS 10.12.6
         re.compile(r'WebKitTestRunner\[\d+\] <Error>: CGContext\w+: invalid context 0x0\. If you want to see the backtrace, please set CG_CONTEXT_SHOW_BACKTRACE environmental variable.'),
     ]

     def _filter_output(self, output):
         if output.text:
             output.text = self.filter_ignored_lines(self._lines_to_ignore, output.text)
         if output.error:
             if self._port.name().startswith('mac-sierra'):
                 output.error = self.filter_ignored_lines(self._errors_to_ignore_in_sierra, output.error)


 class SingleProcessPerfTest(PerfTest):
     def __init__(self, port, test_name, test_path, test_runner_count=1):
         super(SingleProcessPerfTest, self).__init__(port, test_name, test_path, test_runner_count)


 class PerfTestFactory(object):

     _pattern_map = [
         (re.compile(r'^Dromaeo/'), SingleProcessPerfTest),
     ]

     @classmethod
     def create_perf_test(cls, port, test_name, path, test_runner_count=DEFAULT_TEST_RUNNER_COUNT):
         for (pattern, test_class) in cls._pattern_map:
             if pattern.match(test_name):
                 return test_class(port, test_name, path, test_runner_count)
         return PerfTest(port, test_name, path, test_runner_count)
	# Copyright (C) 2012, 2013 Apple Inc. All rights reserved.
	# Copyright (C) 2012, 2013 Google Inc. All rights reserved.
	# Copyright (C) 2012 Zoltan Horvath, Adobe Systems Incorporated. All rights reserved.
	#
	# Redistribution and use in source and binary forms, with or without
	# modification, are permitted provided that the following conditions are
	# met:
	#
	# * Redistributions of source code must retain the above copyright
	# notice, this list of conditions and the following disclaimer.
	# * Redistributions in binary form must reproduce the above
	# copyright notice, this list of conditions and the following disclaimer
	# in the documentation and/or other materials provided with the
	# distribution.
	# * Neither the name of Google Inc. nor the names of its
	# contributors may be used to endorse or promote products derived from
	# this software without specific prior written permission.
	#
	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	import logging
	import math
	import re

	from webkitpy.port.driver import DriverInput

	DEFAULT_TEST_RUNNER_COUNT = 4

	_log = logging.getLogger(__name__)


	class PerfTestMetric(object):
	def __init__(self, path, test_file_name, metric, unit=None, aggregator=None, iterations=None):
	# FIXME: Fix runner.js to report correct metric names
	self._iterations = iterations or []
	self._unit = unit or self.metric_to_unit(metric)
	self._aggregator = aggregator
	self._metric = self.time_unit_to_metric(self._unit) if metric == 'Time' else metric
	self._path = path
	self._test_file_name = test_file_name

	def name(self):
	return self._metric

	def aggregator(self):
	return self._aggregator

	def path(self):
	return self._path

	def test_file_name(self):
	return self._test_file_name

	def has_values(self):
	return bool(self._iterations)

	def append_group(self, group_values):
	assert isinstance(group_values, list)
	self._iterations.append(group_values)

	def grouped_iteration_values(self):
	return self._iterations

	def flattened_iteration_values(self):
	return [value for group_values in self._iterations for value in group_values]

	def unit(self):
	return self._unit

	@staticmethod
	def metric_to_unit(metric):
	assert metric in ('Time', 'Malloc', 'JSHeap')
	return 'ms' if metric == 'Time' else 'bytes'

	@staticmethod
	def time_unit_to_metric(unit):
	return {'fps': 'FrameRate', 'runs/s': 'Runs', 'ms': 'Time'}[unit]


	class PerfTest(object):

	def __init__(self, port, test_name, test_path, test_runner_count=DEFAULT_TEST_RUNNER_COUNT):
	self._port = port
	self._test_name = test_name
	self._test_path = test_path
	self._description = None
	self._metrics = []
	self._test_runner_count = test_runner_count

	def test_name(self):
	return self._test_name

	def test_name_without_file_extension(self):
	return re.sub(r'\.\w+$', '', self.test_name())

	def test_path(self):
	return self._test_path

	def description(self):
	return self._description

	def prepare(self, time_out_ms):
	return True

	def _create_driver(self, no_timeout):
	return self._port.create_driver(worker_number=0, no_timeout=no_timeout)

	def run(self, time_out_ms, no_timeout=False):
	for _ in range(self._test_runner_count):
	driver = self._create_driver(no_timeout)
	try:
	if not self._run_with_driver(driver, time_out_ms):
	return None
	finally:
	driver.stop()

	should_log = not self._port.get_option('profile')
	if should_log and self._description:
	_log.info('DESCRIPTION: %s' % self._description)

	results = []
	for subtest in self._metrics:
	for metric in subtest['metrics']:
	results.append(metric)
	if should_log and not subtest['name']:
	legacy_chromium_bot_compatible_name = self.test_name_without_file_extension().replace('/', ': ')
	self.log_statistics(legacy_chromium_bot_compatible_name + ': ' + metric.name(),
	metric.flattened_iteration_values(), metric.unit())

	return results

	@staticmethod
	def log_statistics(test_name, values, unit):
	sorted_values = sorted(values)

	# Compute the mean and variance using Knuth's online algorithm (has good numerical stability).
	square_sum = 0
	mean = 0
	for i, time in enumerate(sorted_values):
	delta = time - mean
	sweep = i + 1.0
	mean += delta / sweep
	square_sum += delta * (time - mean)

	middle = int(len(sorted_values) / 2)
	mean = sum(sorted_values) / len(values)
	median = sorted_values[middle] if len(sorted_values) % 2 else (sorted_values[middle - 1] + sorted_values[middle]) / 2
	stdev = math.sqrt(square_sum / (len(sorted_values) - 1)) if len(sorted_values) > 1 else 0

	_log.info('RESULT %s= %s %s' % (test_name, mean, unit))
	_log.info('median= {median} {unit}, stdev= {stdev} {unit}, min= {min} {unit}, max= {max} {unit}'.format(
	median=median,
	stdev=round(stdev, 10),
	min=sorted_values[0],
	max=sorted_values[-1],
	unit=unit,
	))

	_description_regex = re.compile(r'^Description: (?P<description>.*)$', re.IGNORECASE)
	_metrics_regex = re.compile(r'^(?P<subtest>[A-Za-z0-9\(\[].+?)?:(?P<metric>[A-Z][A-Za-z]+)(:(?P<aggregator>[A-Z][A-Za-z]+))? -> \[(?P<values>(\d+(\.\d+)?)(, \d+(\.\d+)?)+)\] (?P<unit>[a-z/]+)?$')

	def _run_with_driver(self, driver, time_out_ms):
	output = self.run_single(driver, self.test_path(), time_out_ms)
	self._filter_output(output)
	if self.run_failed(output):
	return False

	for line in re.split('\n', output.text):
	description_match = self._description_regex.match(line)
	if description_match:
	self._description = description_match.group('description')
	continue

	metric_match = self._metrics_regex.match(line)
	if not metric_match:
	_log.error('ERROR: ' + line)
	return False

	metric = self._ensure_metrics(metric_match.group('metric'), metric_match.group('subtest'), metric_match.group('unit'), metric_match.group('aggregator'))
	metric.append_group(list(map(lambda value: float(value), metric_match.group('values').split(', '))))

	return True

	def _ensure_metrics(self, metric_name, subtest_name='', unit=None, aggregator=None):
	try:
	subtest = next(subtest for subtest in self._metrics if subtest['name'] == subtest_name)
	except StopIteration:
	subtest = {'name': subtest_name, 'metrics': []}
	self._metrics.append(subtest)

	try:
	return next(metric for metric in subtest['metrics'] if metric.name() == metric_name)
	except StopIteration:
	path = self.test_name_without_file_extension().split('/')
	if subtest_name:
	path += subtest_name.split('/')
	metric = PerfTestMetric(path, self._test_name, metric_name, unit, aggregator)
	subtest['metrics'].append(metric)
	return metric

	def run_single(self, driver, test_path, time_out_ms, should_run_pixel_test=False):
	return driver.run_test(DriverInput(test_path, time_out_ms, image_hash=None, should_run_pixel_test=should_run_pixel_test), stop_when_done=False)

	def run_failed(self, output):
	if output.text == None:
	pass
	elif output.error:
	_log.error('error: %s\n%s' % (self.test_name(), output.error))
	elif output.timeout:
	_log.error('timeout: %s' % self.test_name())
	elif output.crash:
	_log.error('crash: %s' % self.test_name())
	else:
	return False

	return True

	@staticmethod
	def _should_ignore_line(regexps, line):
	if not line:
	return True
	for regexp in regexps:
	if regexp.search(line):
	return True
	return False

	@staticmethod
	def filter_ignored_lines(regexps, text):
	lines = re.split('\n', text)
	filtered_lines = [line for line in lines if not PerfTest._should_ignore_line(regexps, line)]
	return '\n'.join(filtered_lines)

	_lines_to_ignore = [
	re.compile(r"^\s+$"),
	# Following are for handle existing test like Dromaeo
	re.compile(re.escape("""main frame - has 1 onunload handler(s)""")),
	re.compile('frame \"[^"]+\" - has \\d+ onunload handler\\(s\\)'),
	# Following is for html5.html
	re.compile(re.escape("""Blocked access to external URL http://www.whatwg.org/specs/web-apps/current-work/""")),
	re.compile(r"CONSOLE MESSAGE: (line \d+: )?Blocked script execution in '[A-Za-z0-9\-\.:]+' because the document's frame is sandboxed and the 'allow-scripts' permission is not set."),
	re.compile(r"CONSOLE MESSAGE: (line \d+: )?Not allowed to load local resource"),
	# Speedometer 2.0
	re.compile(r'CONSOLE MESSAGE: (line \d+: )?DEBUG: -------------------------------'),
	re.compile(r'CONSOLE MESSAGE: (line \d+: )?DEBUG: Ember\s+: (\d\.)+'),
	re.compile(r'CONSOLE MESSAGE: (line \d+: )?DEBUG: jQuery\s+: (\d\.)+'),
	re.compile(r"CONSOLE MESSAGE: Consider using 'dppx' units instead of '.+"),
	]

	_errors_to_ignore_in_sierra = [
	# GC errors on macOS 10.12.6
	re.compile(r'WebKitTestRunner\[\d+\] <Error>: CGContext\w+: invalid context 0x0\. If you want to see the backtrace, please set CG_CONTEXT_SHOW_BACKTRACE environmental variable.'),
	]

	def _filter_output(self, output):
	if output.text:
	output.text = self.filter_ignored_lines(self._lines_to_ignore, output.text)
	if output.error:
	if self._port.name().startswith('mac-sierra'):
	output.error = self.filter_ignored_lines(self._errors_to_ignore_in_sierra, output.error)


	class SingleProcessPerfTest(PerfTest):
	def __init__(self, port, test_name, test_path, test_runner_count=1):
	super(SingleProcessPerfTest, self).__init__(port, test_name, test_path, test_runner_count)


	class PerfTestFactory(object):

	_pattern_map = [
	(re.compile(r'^Dromaeo/'), SingleProcessPerfTest),
	]

	@classmethod
	def create_perf_test(cls, port, test_name, path, test_runner_count=DEFAULT_TEST_RUNNER_COUNT):
	for (pattern, test_class) in cls._pattern_map:
	if pattern.match(test_name):
	return test_class(port, test_name, path, test_runner_count)
	return PerfTest(port, test_name, path, test_runner_count)