blob: 336daa6b4d7397644e0782d27c2a8d7d0a380a1d [file] [log] [blame]
# Copyright (C) 2015 Apple Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import json
import math
import re
import sys
class BenchmarkResults(object):
aggregators = {
'Total': (lambda values: sum(values)),
'Arithmetic': (lambda values: sum(values) / len(values)),
'Geometric': (lambda values: math.exp(sum(map(math.log, values)) / len(values))),
}
metric_to_unit = {
'FrameRate': 'fps',
'Runs': '/s',
'Time': 'ms',
'Duration': 'ms',
'Malloc': 'B',
'Heap': 'B',
'Allocations': 'B',
'Score': 'pt',
}
SI_prefixes = ['n', 'u', 'm', '', 'K', 'M', 'G', 'T', 'P', 'E']
def __init__(self, results):
self._lint_results(results)
self._results = self._aggregate_results(results)
def format(self, scale_unit=True, show_iteration_values=False, max_depth=sys.maxsize):
return self._format_tests(self._results, scale_unit, show_iteration_values, max_depth)
@classmethod
def _format_tests(cls, tests, scale_unit, show_iteration_values, max_depth, indent=''):
output = ''
config_name = 'current'
for test_name in sorted(tests.keys()):
is_first = True
test = tests[test_name]
metrics = test.get('metrics', {})
for metric_name in sorted(metrics.keys()):
metric = metrics[metric_name]
for aggregator_name in sorted(metric.keys()):
output += indent
if is_first:
output += test_name
is_first = False
else:
output += ' ' * len(test_name)
output += ':' + metric_name + ':'
if aggregator_name:
output += aggregator_name + ':'
output += ' ' + cls._format_values(metric_name, metric[aggregator_name][config_name], scale_unit, show_iteration_values) + '\n'
if 'tests' in test and max_depth > 1:
output += cls._format_tests(test['tests'], scale_unit, show_iteration_values, max_depth - 1, indent=(indent + ' ' * len(test_name)))
return output
@classmethod
def _format_values(cls, metric_name, values, scale_unit=True, show_iteration_values=False):
values = map(float, values)
total = sum(values)
mean = total / len(values)
square_sum = sum(map(lambda x: x * x, values))
sample_count = len(values)
# With sum and sum of squares, we can compute the sample standard deviation in O(1).
# See https://rniwa.com/2012-11-10/sample-standard-deviation-in-terms-of-sum-and-square-sum-of-samples/
if sample_count <= 1:
sample_stdev = 0
else:
# Be careful about round-off error when sample_stdev is 0.
sample_stdev = math.sqrt(max(0, square_sum / (sample_count - 1) - total * total / (sample_count - 1) / sample_count))
unit = cls._unit_from_metric(metric_name)
if not scale_unit:
formatted_value = '{mean:.3f}{unit} stdev={delta:.1%}'.format(mean=mean, delta=sample_stdev / mean, unit=unit)
if show_iteration_values:
formatted_value += ' [' + ', '.join(map(lambda value: '{value:.3f}'.format(value=value), values)) + ']'
return formatted_value
if unit == 'ms':
unit = 's'
mean = float(mean) / 1000
values = map(lambda value: float(value) / 1000, values)
sample_stdev /= 1000
base = 1024 if unit == 'B' else 1000
value_sig_fig = 1 - math.floor(math.log10(sample_stdev / mean)) if sample_stdev else 3
SI_magnitude = math.floor(math.log(mean, base))
scaling_factor = math.pow(base, -SI_magnitude)
scaled_mean = mean * scaling_factor
SI_prefix = cls.SI_prefixes[int(SI_magnitude) + 3]
non_floating_digits = 1 + math.floor(math.log10(scaled_mean))
floating_points_count = max(0, value_sig_fig - non_floating_digits)
def format_scaled(value):
return ('{value:.' + str(int(floating_points_count)) + 'f}').format(value=value)
formatted_value = '{mean}{prefix}{unit} stdev={delta:.1%}'.format(mean=format_scaled(scaled_mean), delta=sample_stdev / mean, prefix=SI_prefix, unit=unit)
if show_iteration_values:
formatted_value += ' [' + ', '.join(map(lambda value: format_scaled(value * scaling_factor), values)) + ']'
return formatted_value
@classmethod
def _unit_from_metric(cls, metric_name):
# FIXME: Detect unknown mettric names
suffix = re.match(r'.*?([A-z][a-z]+|FrameRate)$', metric_name)
return cls.metric_to_unit[suffix.group(1)]
@classmethod
def _aggregate_results(cls, tests):
results = {}
for test_name, test in tests.iteritems():
results[test_name] = cls._aggregate_results_for_test(test)
return results
@classmethod
def _aggregate_results_for_test(cls, test):
subtest_results = cls._aggregate_results(test['tests']) if 'tests' in test else {}
results = {}
for metric_name, metric in test.get('metrics', {}).iteritems():
if not isinstance(metric, list):
results[metric_name] = {None: {}}
for config_name, values in metric.iteritems():
results[metric_name][None][config_name] = cls._flatten_list(values)
continue
aggregator_list = metric
results[metric_name] = {}
for aggregator in aggregator_list:
values_by_config_iteration = cls._subtest_values_by_config_iteration(subtest_results, metric_name, aggregator)
for config_name, values_by_iteration in values_by_config_iteration.iteritems():
results[metric_name].setdefault(aggregator, {})
results[metric_name][aggregator][config_name] = [cls._aggregate_values(aggregator, values) for values in values_by_iteration]
return {'metrics': results, 'tests': subtest_results}
@classmethod
def _flatten_list(cls, nested_list):
flattened_list = []
for item in nested_list:
if isinstance(item, list):
flattened_list += cls._flatten_list(item)
else:
flattened_list.append(item)
return flattened_list
@classmethod
def _subtest_values_by_config_iteration(cls, subtest_results, metric_name, aggregator):
values_by_config_iteration = {}
for subtest_name, subtest in subtest_results.iteritems():
results_for_metric = subtest['metrics'].get(metric_name, {})
if aggregator in results_for_metric:
results_for_aggregator = results_for_metric.get(aggregator)
elif None in results_for_metric:
results_for_aggregator = results_for_metric.get(None)
elif len(results_for_metric.keys()) == 1:
results_for_aggregator = results_for_metric.get(results_for_metric.keys()[0])
else:
results_for_aggregator = {}
for config_name, values in results_for_aggregator.iteritems():
values_by_config_iteration.setdefault(config_name, [[] for _ in values])
for iteration, value in enumerate(values):
values_by_config_iteration[config_name][iteration].append(value)
return values_by_config_iteration
@classmethod
def _aggregate_values(cls, aggregator, values):
return cls.aggregators[aggregator](values)
@classmethod
def _lint_results(cls, tests):
cls._lint_subtest_results(tests, None, None)
return True
@classmethod
def _lint_subtest_results(cls, subtests, parent_test, parent_aggregator_list):
iteration_groups_by_config = {}
for test_name, test in subtests.iteritems():
aggregator_list = None
if 'metrics' not in test and 'tests' not in test:
raise TypeError('"%s" does not contain metrics or tests' % test_name)
if 'metrics' in test:
metrics = test['metrics']
if not isinstance(metrics, dict):
raise TypeError('The metrics in "%s" is not a dictionary' % test_name)
for metric_name, metric in metrics.iteritems():
if isinstance(metric, list):
cls._lint_aggregator_list(test_name, metric_name, metric, parent_test, parent_aggregator_list)
aggregator_list = metric
elif isinstance(metric, dict):
cls._lint_configuration(test_name, metric_name, metric, parent_test, parent_aggregator_list, iteration_groups_by_config)
else:
raise TypeError('"%s" metric of "%s" was not an aggregator list or a dictionary of configurations: %s' % (metric_name, test_name, str(metric)))
if 'tests' in test:
cls._lint_subtest_results(test['tests'], test_name, aggregator_list)
elif aggregator_list:
raise TypeError('"%s" requires aggregation but it has no subtests' % (test_name))
return iteration_groups_by_config
@classmethod
def _lint_aggregator_list(cls, test_name, metric_name, aggregator_list, parent_test, parent_aggregator_list):
if len(aggregator_list) != len(set(aggregator_list)):
raise TypeError('"%s" metric of "%s" had invalid aggregator list: %s' % (metric_name, test_name, json.dumps(aggregator_list)))
if not aggregator_list:
raise TypeError('The aggregator list is empty in "%s" metric of "%s"' % (metric_name, test_name))
for aggregator_name in aggregator_list:
if cls._is_numeric(aggregator_name):
raise TypeError('"%s" metric of "%s" is not wrapped by a configuration; e.g. "current"' % (metric_name, test_name))
if aggregator_name not in cls.aggregators:
raise TypeError('"%s" metric of "%s" uses unknown aggregator: %s' % (metric_name, test_name, aggregator_name))
if not parent_aggregator_list:
return
for parent_aggregator in parent_aggregator_list:
if parent_aggregator not in aggregator_list and len(aggregator_list) > 1:
raise TypeError('"%s" metric of "%s" has no value to aggregate as "%s" in a subtest "%s"' % (
metric_name, parent_test, parent_aggregator, test_name))
@classmethod
def _lint_configuration(cls, test_name, metric_name, configurations, parent_test, parent_aggregator_list, iteration_groups_by_config):
# FIXME: Check that config_name is always "current".
for config_name, values in configurations.iteritems():
nested_list_count = [isinstance(value, list) for value in values].count(True)
if nested_list_count not in [0, len(values)]:
raise TypeError('"%s" metric of "%s" had malformed values: %s' % (metric_name, test_name, json.dumps(values)))
if nested_list_count:
value_shape = []
for value_group in values:
value_shape.append(len(value_group))
cls._lint_values(test_name, metric_name, value_group)
else:
value_shape = len(values)
cls._lint_values(test_name, metric_name, values)
iteration_groups_by_config.setdefault(metric_name, {}).setdefault(config_name, value_shape)
if parent_aggregator_list and value_shape != iteration_groups_by_config[metric_name][config_name]:
raise TypeError('"%s" metric of "%s" had a mismatching subtest values' % (metric_name, parent_test))
@classmethod
def _lint_values(cls, test_name, metric_name, values):
if any([not cls._is_numeric(value) for value in values]):
raise TypeError('"%s" metric of "%s" contains non-numeric value: %s' % (metric_name, test_name, json.dumps(values)))
@classmethod
def _is_numeric(cls, value):
return isinstance(value, int) or isinstance(value, float)