Source/ThirdParty/ANGLE/src/tests/run_perf_tests.py - WebKit - Git at Google

 #! /usr/bin/env vpython3
 #
 # Copyright 2021 The ANGLE Project Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 #
 # run_perf_test.py:
 #   Runs ANGLE perf tests using some statistical averaging.

 import argparse
 import fnmatch
 import importlib
 import io
 import json
 import logging
 import time
 import os
 import re
 import subprocess
 import sys

 # Add //src/testing into sys.path for importing xvfb and test_env, and
 # //src/testing/scripts for importing common.
 d = os.path.dirname
 THIS_DIR = d(os.path.abspath(__file__))
 ANGLE_DIR = d(d(THIS_DIR))
 sys.path.append(os.path.join(ANGLE_DIR, 'testing'))
 sys.path.append(os.path.join(ANGLE_DIR, 'testing', 'scripts'))

 import common
 import test_env
 import xvfb

 sys.path.append(os.path.join(ANGLE_DIR, 'third_party', 'catapult', 'tracing'))
 from tracing.value import histogram
 from tracing.value import histogram_set
 from tracing.value import merge_histograms

 DEFAULT_TEST_SUITE = 'angle_perftests'
 DEFAULT_LOG = 'info'
 DEFAULT_SAMPLES = 4
 DEFAULT_TRIALS = 3
 DEFAULT_MAX_ERRORS = 3
 DEFAULT_WARMUP_LOOPS = 2
 DEFAULT_CALIBRATION_TIME = 2

 # Filters out stuff like: " I   72.572s run_tests_on_device(96071FFAZ00096) "
 ANDROID_LOGGING_PREFIX = r'I +\d+.\d+s \w+\(\w+\)  '

 # Test expectations
 FAIL = 'FAIL'
 PASS = 'PASS'
 SKIP = 'SKIP'

 EXIT_FAILURE = 1
 EXIT_SUCCESS = 0


 def is_windows():
     return sys.platform == 'cygwin' or sys.platform.startswith('win')


 def get_binary_name(binary):
     if is_windows():
         return '.\\%s.exe' % binary
     else:
         return './%s' % binary


 def _popen(*args, **kwargs):
     assert 'creationflags' not in kwargs
     if sys.platform == 'win32':
         # Necessary for signal handling. See crbug.com/733612#c6.
         kwargs['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP
     return subprocess.Popen(*args, **kwargs)


 def run_command_with_output(argv, stdoutfile, env=None, cwd=None, log=True):
     assert stdoutfile
     with io.open(stdoutfile, 'wb') as writer:
         process = _popen(argv, env=env, cwd=cwd, stdout=writer, stderr=subprocess.STDOUT)
         test_env.forward_signals([process])
         while process.poll() is None:
             # This sleep is needed for signal propagation. See the
             # wait_with_signals() docstring.
             time.sleep(0.1)
         return process.returncode


 def _run_and_get_output(args, cmd, env):
     lines = []
     logging.debug(' '.join(cmd))
     with common.temporary_file() as tempfile_path:
         if args.xvfb:
             exit_code = xvfb.run_executable(cmd, env, stdoutfile=tempfile_path)
         else:
             exit_code = run_command_with_output(cmd, env=env, stdoutfile=tempfile_path, log=False)
         with open(tempfile_path) as f:
             for line in f:
                 lines.append(line.strip())
     return exit_code, lines


 def _filter_tests(tests, pattern):
     return [test for test in tests if fnmatch.fnmatch(test, pattern)]


 def _shard_tests(tests, shard_count, shard_index):
     return [tests[index] for index in range(shard_index, len(tests), shard_count)]


 def _get_results_from_output(output, result):
     output = '\n'.join(output)
     m = re.search(r'Running (\d+) tests', output)
     if m and int(m.group(1)) > 1:
         raise Exception('Found more than one test result in output')

     # Results are reported in the format:
     # name_backend.result: story= value units.
     pattern = r'\.' + result + r':.*= ([0-9.]+)'
     logging.debug('Searching for %s in output' % pattern)
     m = re.findall(pattern, output)
     if not m:
         logging.warning('Did not find the result "%s" in the test output:\n%s' % (result, output))
         return None

     return [float(value) for value in m]


 def _get_tests_from_output(lines):
     seen_start_of_tests = False
     tests = []
     android_prefix = re.compile(ANDROID_LOGGING_PREFIX)
     logging.debug('Read %d lines from test output.' % len(lines))
     for line in lines:
         line = android_prefix.sub('', line.strip())
         if line == 'Tests list:':
             seen_start_of_tests = True
         elif line == 'End tests list.':
             break
         elif seen_start_of_tests:
             tests.append(line)
     if not seen_start_of_tests:
         raise Exception('Did not find test list in test output!')
     logging.debug('Found %d tests from test output.' % len(tests))
     return tests


 def _truncated_list(data, n):
     """Compute a truncated list, n is truncation size"""
     if len(data) < n * 2:
         raise ValueError('list not large enough to truncate')
     return sorted(data)[n:-n]


 def _mean(data):
     """Return the sample arithmetic mean of data."""
     n = len(data)
     if n < 1:
         raise ValueError('mean requires at least one data point')
     return float(sum(data)) / float(n)  # in Python 2 use sum(data)/float(n)


 def _sum_of_square_deviations(data, c):
     """Return sum of square deviations of sequence data."""
     ss = sum((float(x) - c)**2 for x in data)
     return ss


 def _coefficient_of_variation(data):
     """Calculates the population coefficient of variation."""
     n = len(data)
     if n < 2:
         raise ValueError('variance requires at least two data points')
     c = _mean(data)
     ss = _sum_of_square_deviations(data, c)
     pvar = ss / n  # the population variance
     stddev = (pvar**0.5)  # population standard deviation
     return stddev / c


 def _save_extra_output_files(args, results, histograms):
     isolated_out_dir = os.path.dirname(args.isolated_script_test_output)
     if not os.path.isdir(isolated_out_dir):
         return
     benchmark_path = os.path.join(isolated_out_dir, args.test_suite)
     if not os.path.isdir(benchmark_path):
         os.makedirs(benchmark_path)
     test_output_path = os.path.join(benchmark_path, 'test_results.json')
     results.save_to_json_file(test_output_path)
     perf_output_path = os.path.join(benchmark_path, 'perf_results.json')
     logging.info('Saving perf histograms to %s.' % perf_output_path)
     with open(perf_output_path, 'w') as out_file:
         out_file.write(json.dumps(histograms.AsDicts(), indent=2))


 class Results:

     def __init__(self):
         self._results = {
             'tests': {},
             'interrupted': False,
             'seconds_since_epoch': time.time(),
             'path_delimiter': '.',
             'version': 3,
             'num_failures_by_type': {
                 FAIL: 0,
                 PASS: 0,
                 SKIP: 0,
             },
         }
         self._test_results = {}

     def has_result(self, test):
         return test in self._test_results

     def result_skip(self, test):
         self._test_results[test] = {'expected': SKIP, 'actual': SKIP}
         self._results['num_failures_by_type'][SKIP] += 1

     def result_pass(self, test):
         self._test_results[test] = {'expected': PASS, 'actual': PASS}
         self._results['num_failures_by_type'][PASS] += 1

     def result_fail(self, test):
         self._test_results[test] = {'expected': PASS, 'actual': FAIL, 'is_unexpected': True}
         self._results['num_failures_by_type'][FAIL] += 1

     def save_to_output_file(self, test_suite, fname):
         self._update_results(test_suite)
         with open(fname, 'w') as out_file:
             out_file.write(json.dumps(self._results, indent=2))

     def save_to_json_file(self, fname):
         logging.info('Saving test results to %s.' % fname)
         with open(fname, 'w') as out_file:
             out_file.write(json.dumps(self._results, indent=2))

     def _update_results(self, test_suite):
         if self._test_results:
             self._results['tests'][test_suite] = self._test_results
             self._test_results = {}


 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument('--isolated-script-test-output', type=str)
     parser.add_argument('--isolated-script-test-perf-output', type=str)
     parser.add_argument(
         '-f', '--filter', '--isolated-script-test-filter', type=str, help='Test filter.')
     parser.add_argument('--test-suite', help='Test suite to run.', default=DEFAULT_TEST_SUITE)
     parser.add_argument('--xvfb', help='Use xvfb.', action='store_true')
     parser.add_argument(
         '--shard-count',
         help='Number of shards for test splitting. Default is 1.',
         type=int,
         default=1)
     parser.add_argument(
         '--shard-index',
         help='Index of the current shard for test splitting. Default is 0.',
         type=int,
         default=0)
     parser.add_argument(
         '-l', '--log', help='Log output level. Default is %s.' % DEFAULT_LOG, default=DEFAULT_LOG)
     parser.add_argument(
         '-s',
         '--samples-per-test',
         help='Number of samples to run per test. Default is %d.' % DEFAULT_SAMPLES,
         type=int,
         default=DEFAULT_SAMPLES)
     parser.add_argument(
         '-t',
         '--trials-per-sample',
         help='Number of trials to run per sample. Default is %d.' % DEFAULT_TRIALS,
         type=int,
         default=DEFAULT_TRIALS)
     parser.add_argument(
         '--steps-per-trial', help='Fixed number of steps to run per trial.', type=int)
     parser.add_argument(
         '--max-errors',
         help='After this many errors, abort the run. Default is %d.' % DEFAULT_MAX_ERRORS,
         type=int,
         default=DEFAULT_MAX_ERRORS)
     parser.add_argument(
         '--smoke-test-mode', help='Do a quick run to validate correctness.', action='store_true')
     parser.add_argument(
         '--warmup-loops',
         help='Number of warmup loops to run in the perf test. Default is %d.' %
         DEFAULT_WARMUP_LOOPS,
         type=int,
         default=DEFAULT_WARMUP_LOOPS)
     parser.add_argument(
         '--calibration-time',
         help='Amount of time to spend each loop in calibration and warmup. Default is %d seconds.'
         % DEFAULT_CALIBRATION_TIME,
         type=int,
         default=DEFAULT_CALIBRATION_TIME)

     args, extra_flags = parser.parse_known_args()

     importlib.reload(logging)
     logging.basicConfig(level=args.log.upper())

     start_time = time.time()

     # Use fast execution for smoke test mode.
     if args.smoke_test_mode:
         args.steps_per_trial = 1
         args.trials_per_sample = 1
         args.samples_per_test = 1

     env = os.environ.copy()

     # Get sharding args
     if 'GTEST_TOTAL_SHARDS' in env and int(env['GTEST_TOTAL_SHARDS']) != 1:
         if 'GTEST_SHARD_INDEX' not in env:
             logging.error('Sharding params must be specified together.')
             sys.exit(1)
         args.shard_count = int(env.pop('GTEST_TOTAL_SHARDS'))
         args.shard_index = int(env.pop('GTEST_SHARD_INDEX'))

     # Get test list
     cmd = [get_binary_name(args.test_suite), '--list-tests', '--verbose']
     exit_code, lines = _run_and_get_output(args, cmd, env)
     if exit_code != EXIT_SUCCESS:
         logging.fatal('Could not find test list from test output:\n%s' % '\n'.join(lines))
     tests = _get_tests_from_output(lines)

     if args.filter:
         tests = _filter_tests(tests, args.filter)

     # Get tests for this shard (if using sharding args)
     tests = _shard_tests(tests, args.shard_count, args.shard_index)

     num_tests = len(tests)
     if num_tests == 0:
         logging.error('No tests to run.')
         return EXIT_FAILURE

     logging.info('Running %d test%s' % (num_tests, 's' if num_tests > 1 else ' '))

     # Run tests
     results = Results()

     histograms = histogram_set.HistogramSet()
     total_errors = 0

     for test_index in range(num_tests):
         test = tests[test_index]
         cmd = [
             get_binary_name(args.test_suite),
             '--gtest_filter=%s' % test,
             '--extract-test-list-from-filter',
             '--enable-device-cache',
             '--skip-clear-data',
             '--use-existing-test-data',
             '--verbose',
             '--calibration-time',
             str(args.calibration_time),
         ]
         if args.steps_per_trial:
             steps_per_trial = args.steps_per_trial
         else:
             cmd_calibrate = cmd + [
                 '--calibration',
                 '--warmup-loops',
                 str(args.warmup_loops),
             ]
             exit_code, calibrate_output = _run_and_get_output(args, cmd_calibrate, env)
             if exit_code != EXIT_SUCCESS:
                 logging.fatal('%s failed. Output:\n%s' %
                               (cmd_calibrate[0], '\n'.join(calibrate_output)))
                 total_errors += 1
                 results.result_fail(test)
                 continue
             steps_per_trial = _get_results_from_output(calibrate_output, 'steps_to_run')
             if not steps_per_trial:
                 logging.warning('Skipping test %s' % test)
                 continue
             assert (len(steps_per_trial) == 1)
             steps_per_trial = int(steps_per_trial[0])
         logging.info('Test %d/%d: %s (samples=%d trials_per_sample=%d steps_per_trial=%d)' %
                      (test_index + 1, num_tests, test, args.samples_per_test,
                       args.trials_per_sample, steps_per_trial))
         wall_times = []
         test_histogram_set = histogram_set.HistogramSet()
         for sample in range(args.samples_per_test):
             if total_errors >= args.max_errors:
                 logging.error('Error count exceeded max errors (%d). Aborting.' % args.max_errors)
                 return EXIT_FAILURE

             cmd_run = cmd + [
                 '--steps-per-trial',
                 str(steps_per_trial),
                 '--trials',
                 str(args.trials_per_sample),
             ]
             if args.smoke_test_mode:
                 cmd_run += ['--no-warmup']
             else:
                 cmd_run += ['--warmup-loops', str(args.warmup_loops)]
             with common.temporary_file() as histogram_file_path:
                 cmd_run += ['--isolated-script-test-perf-output=%s' % histogram_file_path]
                 exit_code, output = _run_and_get_output(args, cmd_run, env)
                 if exit_code != EXIT_SUCCESS:
                     logging.error('%s failed. Output:\n%s' % (cmd_run[0], '\n'.join(output)))
                     results.result_fail(test)
                     total_errors += 1
                     break

                 sample_wall_times = _get_results_from_output(output, 'wall_time')
                 if not sample_wall_times:
                     # This can be intentional for skipped tests. They are handled below.
                     logging.warning('Test %s failed to produce a sample output' % test)
                     break
                 logging.info('Test %d/%d Sample %d/%d wall_times: %s' %
                              (test_index + 1, num_tests, sample + 1, args.samples_per_test,
                               str(sample_wall_times)))
                 wall_times += sample_wall_times
                 with open(histogram_file_path) as histogram_file:
                     sample_json = json.load(histogram_file)
                     sample_histogram = histogram_set.HistogramSet()
                     sample_histogram.ImportDicts(sample_json)
                     test_histogram_set.Merge(sample_histogram)

         if not results.has_result(test):
             if not wall_times:
                 logging.warning('Skipping test %s. Assuming this is intentional.' % test)
                 results.result_skip(test)
             elif len(wall_times) == (args.samples_per_test * args.trials_per_sample):
                 if len(wall_times) > 7:
                     truncation_n = len(wall_times) >> 3
                     logging.debug(
                         'Truncation: Removing the %d highest and lowest times from wall_times.' %
                         truncation_n)
                     wall_times = _truncated_list(wall_times, truncation_n)

                 if len(wall_times) > 1:
                     logging.info('Test %d/%d: %s: truncated mean wall_time = %.2f, cov = %.2f%%' %
                                  (test_index + 1, num_tests, test, _mean(wall_times),
                                   (_coefficient_of_variation(wall_times) * 100.0)))
                 results.result_pass(test)

                 # Merge the histogram set into one histogram
                 with common.temporary_file() as merge_histogram_path:
                     logging.info('Writing merged histograms to %s.' % merge_histogram_path)
                     with open(merge_histogram_path, 'w') as merge_histogram_file:
                         json.dump(test_histogram_set.AsDicts(), merge_histogram_file)
                         merge_histogram_file.close()
                     merged_dicts = merge_histograms.MergeHistograms(
                         merge_histogram_path, groupby=['name'])
                     merged_histogram = histogram_set.HistogramSet()
                     merged_histogram.ImportDicts(merged_dicts)
                     histograms.Merge(merged_histogram)
             else:
                 logging.error('Test %s failed to record some samples' % test)

     if args.isolated_script_test_output:
         results.save_to_output_file(args.test_suite, args.isolated_script_test_output)

         # Uses special output files to match the merge script.
         _save_extra_output_files(args, results, histograms)

     if args.isolated_script_test_perf_output:
         with open(args.isolated_script_test_perf_output, 'w') as out_file:
             out_file.write(json.dumps(histograms.AsDicts(), indent=2))

     end_time = time.time()
     logging.info('Elapsed time: %.2lf seconds.' % (end_time - start_time))

     return EXIT_SUCCESS


 # This is not really a "script test" so does not need to manually add
 # any additional compile targets.
 def main_compile_targets(args):
     json.dump([], args.output)


 if __name__ == '__main__':
     # Conform minimally to the protocol defined by ScriptTest.
     if 'compile_targets' in sys.argv:
         funcs = {
             'run': None,
             'compile_targets': main_compile_targets,
         }
         sys.exit(common.run_script(sys.argv[1:], funcs))
     sys.exit(main())
	#! /usr/bin/env vpython3
	#
	# Copyright 2021 The ANGLE Project Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	#
	# run_perf_test.py:
	# Runs ANGLE perf tests using some statistical averaging.

	import argparse
	import fnmatch
	import importlib
	import io
	import json
	import logging
	import time
	import os
	import re
	import subprocess
	import sys

	# Add //src/testing into sys.path for importing xvfb and test_env, and
	# //src/testing/scripts for importing common.
	d = os.path.dirname
	THIS_DIR = d(os.path.abspath(__file__))
	ANGLE_DIR = d(d(THIS_DIR))
	sys.path.append(os.path.join(ANGLE_DIR, 'testing'))
	sys.path.append(os.path.join(ANGLE_DIR, 'testing', 'scripts'))

	import common
	import test_env
	import xvfb

	sys.path.append(os.path.join(ANGLE_DIR, 'third_party', 'catapult', 'tracing'))
	from tracing.value import histogram
	from tracing.value import histogram_set
	from tracing.value import merge_histograms

	DEFAULT_TEST_SUITE = 'angle_perftests'
	DEFAULT_LOG = 'info'
	DEFAULT_SAMPLES = 4
	DEFAULT_TRIALS = 3
	DEFAULT_MAX_ERRORS = 3
	DEFAULT_WARMUP_LOOPS = 2
	DEFAULT_CALIBRATION_TIME = 2

	# Filters out stuff like: " I 72.572s run_tests_on_device(96071FFAZ00096) "
	ANDROID_LOGGING_PREFIX = r'I +\d+.\d+s \w+\(\w+\) '

	# Test expectations
	FAIL = 'FAIL'
	PASS = 'PASS'
	SKIP = 'SKIP'

	EXIT_FAILURE = 1
	EXIT_SUCCESS = 0


	def is_windows():
	return sys.platform == 'cygwin' or sys.platform.startswith('win')


	def get_binary_name(binary):
	if is_windows():
	return '.\\%s.exe' % binary
	else:
	return './%s' % binary


	def _popen(args, *kwargs):
	assert 'creationflags' not in kwargs
	if sys.platform == 'win32':
	# Necessary for signal handling. See crbug.com/733612#c6.
	kwargs['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP
	return subprocess.Popen(args, *kwargs)


	def run_command_with_output(argv, stdoutfile, env=None, cwd=None, log=True):
	assert stdoutfile
	with io.open(stdoutfile, 'wb') as writer:
	process = _popen(argv, env=env, cwd=cwd, stdout=writer, stderr=subprocess.STDOUT)
	test_env.forward_signals([process])
	while process.poll() is None:
	# This sleep is needed for signal propagation. See the
	# wait_with_signals() docstring.
	time.sleep(0.1)
	return process.returncode


	def _run_and_get_output(args, cmd, env):
	lines = []
	logging.debug(' '.join(cmd))
	with common.temporary_file() as tempfile_path:
	if args.xvfb:
	exit_code = xvfb.run_executable(cmd, env, stdoutfile=tempfile_path)
	else:
	exit_code = run_command_with_output(cmd, env=env, stdoutfile=tempfile_path, log=False)
	with open(tempfile_path) as f:
	for line in f:
	lines.append(line.strip())
	return exit_code, lines


	def _filter_tests(tests, pattern):
	return [test for test in tests if fnmatch.fnmatch(test, pattern)]


	def _shard_tests(tests, shard_count, shard_index):
	return [tests[index] for index in range(shard_index, len(tests), shard_count)]


	def _get_results_from_output(output, result):
	output = '\n'.join(output)
	m = re.search(r'Running (\d+) tests', output)
	if m and int(m.group(1)) > 1:
	raise Exception('Found more than one test result in output')

	# Results are reported in the format:
	# name_backend.result: story= value units.
	pattern = r'\.' + result + r':.*= ([0-9.]+)'
	logging.debug('Searching for %s in output' % pattern)
	m = re.findall(pattern, output)
	if not m:
	logging.warning('Did not find the result "%s" in the test output:\n%s' % (result, output))
	return None

	return [float(value) for value in m]


	def _get_tests_from_output(lines):
	seen_start_of_tests = False
	tests = []
	android_prefix = re.compile(ANDROID_LOGGING_PREFIX)
	logging.debug('Read %d lines from test output.' % len(lines))
	for line in lines:
	line = android_prefix.sub('', line.strip())
	if line == 'Tests list:':
	seen_start_of_tests = True
	elif line == 'End tests list.':
	break
	elif seen_start_of_tests:
	tests.append(line)
	if not seen_start_of_tests:
	raise Exception('Did not find test list in test output!')
	logging.debug('Found %d tests from test output.' % len(tests))
	return tests


	def _truncated_list(data, n):
	"""Compute a truncated list, n is truncation size"""
	if len(data) < n * 2:
	raise ValueError('list not large enough to truncate')
	return sorted(data)[n:-n]


	def _mean(data):
	"""Return the sample arithmetic mean of data."""
	n = len(data)
	if n < 1:
	raise ValueError('mean requires at least one data point')
	return float(sum(data)) / float(n) # in Python 2 use sum(data)/float(n)


	def _sum_of_square_deviations(data, c):
	"""Return sum of square deviations of sequence data."""
	ss = sum((float(x) - c)**2 for x in data)
	return ss


	def _coefficient_of_variation(data):
	"""Calculates the population coefficient of variation."""
	n = len(data)
	if n < 2:
	raise ValueError('variance requires at least two data points')
	c = _mean(data)
	ss = _sum_of_square_deviations(data, c)
	pvar = ss / n # the population variance
	stddev = (pvar**0.5) # population standard deviation
	return stddev / c


	def _save_extra_output_files(args, results, histograms):
	isolated_out_dir = os.path.dirname(args.isolated_script_test_output)
	if not os.path.isdir(isolated_out_dir):
	return
	benchmark_path = os.path.join(isolated_out_dir, args.test_suite)
	if not os.path.isdir(benchmark_path):
	os.makedirs(benchmark_path)
	test_output_path = os.path.join(benchmark_path, 'test_results.json')
	results.save_to_json_file(test_output_path)
	perf_output_path = os.path.join(benchmark_path, 'perf_results.json')
	logging.info('Saving perf histograms to %s.' % perf_output_path)
	with open(perf_output_path, 'w') as out_file:
	out_file.write(json.dumps(histograms.AsDicts(), indent=2))


	class Results:

	def __init__(self):
	self._results = {
	'tests': {},
	'interrupted': False,
	'seconds_since_epoch': time.time(),
	'path_delimiter': '.',
	'version': 3,
	'num_failures_by_type': {
	FAIL: 0,
	PASS: 0,
	SKIP: 0,
	},
	}
	self._test_results = {}

	def has_result(self, test):
	return test in self._test_results

	def result_skip(self, test):
	self._test_results[test] = {'expected': SKIP, 'actual': SKIP}
	self._results['num_failures_by_type'][SKIP] += 1

	def result_pass(self, test):
	self._test_results[test] = {'expected': PASS, 'actual': PASS}
	self._results['num_failures_by_type'][PASS] += 1

	def result_fail(self, test):
	self._test_results[test] = {'expected': PASS, 'actual': FAIL, 'is_unexpected': True}
	self._results['num_failures_by_type'][FAIL] += 1

	def save_to_output_file(self, test_suite, fname):
	self._update_results(test_suite)
	with open(fname, 'w') as out_file:
	out_file.write(json.dumps(self._results, indent=2))

	def save_to_json_file(self, fname):
	logging.info('Saving test results to %s.' % fname)
	with open(fname, 'w') as out_file:
	out_file.write(json.dumps(self._results, indent=2))

	def _update_results(self, test_suite):
	if self._test_results:
	self._results['tests'][test_suite] = self._test_results
	self._test_results = {}


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--isolated-script-test-output', type=str)
	parser.add_argument('--isolated-script-test-perf-output', type=str)
	parser.add_argument(
	'-f', '--filter', '--isolated-script-test-filter', type=str, help='Test filter.')
	parser.add_argument('--test-suite', help='Test suite to run.', default=DEFAULT_TEST_SUITE)
	parser.add_argument('--xvfb', help='Use xvfb.', action='store_true')
	parser.add_argument(
	'--shard-count',
	help='Number of shards for test splitting. Default is 1.',
	type=int,
	default=1)
	parser.add_argument(
	'--shard-index',
	help='Index of the current shard for test splitting. Default is 0.',
	type=int,
	default=0)
	parser.add_argument(
	'-l', '--log', help='Log output level. Default is %s.' % DEFAULT_LOG, default=DEFAULT_LOG)
	parser.add_argument(
	'-s',
	'--samples-per-test',
	help='Number of samples to run per test. Default is %d.' % DEFAULT_SAMPLES,
	type=int,
	default=DEFAULT_SAMPLES)
	parser.add_argument(
	'-t',
	'--trials-per-sample',
	help='Number of trials to run per sample. Default is %d.' % DEFAULT_TRIALS,
	type=int,
	default=DEFAULT_TRIALS)
	parser.add_argument(
	'--steps-per-trial', help='Fixed number of steps to run per trial.', type=int)
	parser.add_argument(
	'--max-errors',
	help='After this many errors, abort the run. Default is %d.' % DEFAULT_MAX_ERRORS,
	type=int,
	default=DEFAULT_MAX_ERRORS)
	parser.add_argument(
	'--smoke-test-mode', help='Do a quick run to validate correctness.', action='store_true')
	parser.add_argument(
	'--warmup-loops',
	help='Number of warmup loops to run in the perf test. Default is %d.' %
	DEFAULT_WARMUP_LOOPS,
	type=int,
	default=DEFAULT_WARMUP_LOOPS)
	parser.add_argument(
	'--calibration-time',
	help='Amount of time to spend each loop in calibration and warmup. Default is %d seconds.'
	% DEFAULT_CALIBRATION_TIME,
	type=int,
	default=DEFAULT_CALIBRATION_TIME)

	args, extra_flags = parser.parse_known_args()

	importlib.reload(logging)
	logging.basicConfig(level=args.log.upper())

	start_time = time.time()

	# Use fast execution for smoke test mode.
	if args.smoke_test_mode:
	args.steps_per_trial = 1
	args.trials_per_sample = 1
	args.samples_per_test = 1

	env = os.environ.copy()

	# Get sharding args
	if 'GTEST_TOTAL_SHARDS' in env and int(env['GTEST_TOTAL_SHARDS']) != 1:
	if 'GTEST_SHARD_INDEX' not in env:
	logging.error('Sharding params must be specified together.')
	sys.exit(1)
	args.shard_count = int(env.pop('GTEST_TOTAL_SHARDS'))
	args.shard_index = int(env.pop('GTEST_SHARD_INDEX'))

	# Get test list
	cmd = [get_binary_name(args.test_suite), '--list-tests', '--verbose']
	exit_code, lines = _run_and_get_output(args, cmd, env)
	if exit_code != EXIT_SUCCESS:
	logging.fatal('Could not find test list from test output:\n%s' % '\n'.join(lines))
	tests = _get_tests_from_output(lines)

	if args.filter:
	tests = _filter_tests(tests, args.filter)

	# Get tests for this shard (if using sharding args)
	tests = _shard_tests(tests, args.shard_count, args.shard_index)

	num_tests = len(tests)
	if num_tests == 0:
	logging.error('No tests to run.')
	return EXIT_FAILURE

	logging.info('Running %d test%s' % (num_tests, 's' if num_tests > 1 else ' '))

	# Run tests
	results = Results()

	histograms = histogram_set.HistogramSet()
	total_errors = 0

	for test_index in range(num_tests):
	test = tests[test_index]
	cmd = [
	get_binary_name(args.test_suite),
	'--gtest_filter=%s' % test,
	'--extract-test-list-from-filter',
	'--enable-device-cache',
	'--skip-clear-data',
	'--use-existing-test-data',
	'--verbose',
	'--calibration-time',
	str(args.calibration_time),
	]
	if args.steps_per_trial:
	steps_per_trial = args.steps_per_trial
	else:
	cmd_calibrate = cmd + [
	'--calibration',
	'--warmup-loops',
	str(args.warmup_loops),
	]
	exit_code, calibrate_output = _run_and_get_output(args, cmd_calibrate, env)
	if exit_code != EXIT_SUCCESS:
	logging.fatal('%s failed. Output:\n%s' %
	(cmd_calibrate[0], '\n'.join(calibrate_output)))
	total_errors += 1
	results.result_fail(test)
	continue
	steps_per_trial = _get_results_from_output(calibrate_output, 'steps_to_run')
	if not steps_per_trial:
	logging.warning('Skipping test %s' % test)
	continue
	assert (len(steps_per_trial) == 1)
	steps_per_trial = int(steps_per_trial[0])
	logging.info('Test %d/%d: %s (samples=%d trials_per_sample=%d steps_per_trial=%d)' %
	(test_index + 1, num_tests, test, args.samples_per_test,
	args.trials_per_sample, steps_per_trial))
	wall_times = []
	test_histogram_set = histogram_set.HistogramSet()
	for sample in range(args.samples_per_test):
	if total_errors >= args.max_errors:
	logging.error('Error count exceeded max errors (%d). Aborting.' % args.max_errors)
	return EXIT_FAILURE

	cmd_run = cmd + [
	'--steps-per-trial',
	str(steps_per_trial),
	'--trials',
	str(args.trials_per_sample),
	]
	if args.smoke_test_mode:
	cmd_run += ['--no-warmup']
	else:
	cmd_run += ['--warmup-loops', str(args.warmup_loops)]
	with common.temporary_file() as histogram_file_path:
	cmd_run += ['--isolated-script-test-perf-output=%s' % histogram_file_path]
	exit_code, output = _run_and_get_output(args, cmd_run, env)
	if exit_code != EXIT_SUCCESS:
	logging.error('%s failed. Output:\n%s' % (cmd_run[0], '\n'.join(output)))
	results.result_fail(test)
	total_errors += 1
	break

	sample_wall_times = _get_results_from_output(output, 'wall_time')
	if not sample_wall_times:
	# This can be intentional for skipped tests. They are handled below.
	logging.warning('Test %s failed to produce a sample output' % test)
	break
	logging.info('Test %d/%d Sample %d/%d wall_times: %s' %
	(test_index + 1, num_tests, sample + 1, args.samples_per_test,
	str(sample_wall_times)))
	wall_times += sample_wall_times
	with open(histogram_file_path) as histogram_file:
	sample_json = json.load(histogram_file)
	sample_histogram = histogram_set.HistogramSet()
	sample_histogram.ImportDicts(sample_json)
	test_histogram_set.Merge(sample_histogram)

	if not results.has_result(test):
	if not wall_times:
	logging.warning('Skipping test %s. Assuming this is intentional.' % test)
	results.result_skip(test)
	elif len(wall_times) == (args.samples_per_test * args.trials_per_sample):
	if len(wall_times) > 7:
	truncation_n = len(wall_times) >> 3
	logging.debug(
	'Truncation: Removing the %d highest and lowest times from wall_times.' %
	truncation_n)
	wall_times = _truncated_list(wall_times, truncation_n)

	if len(wall_times) > 1:
	logging.info('Test %d/%d: %s: truncated mean wall_time = %.2f, cov = %.2f%%' %
	(test_index + 1, num_tests, test, _mean(wall_times),
	(_coefficient_of_variation(wall_times) * 100.0)))
	results.result_pass(test)

	# Merge the histogram set into one histogram
	with common.temporary_file() as merge_histogram_path:
	logging.info('Writing merged histograms to %s.' % merge_histogram_path)
	with open(merge_histogram_path, 'w') as merge_histogram_file:
	json.dump(test_histogram_set.AsDicts(), merge_histogram_file)
	merge_histogram_file.close()
	merged_dicts = merge_histograms.MergeHistograms(
	merge_histogram_path, groupby=['name'])
	merged_histogram = histogram_set.HistogramSet()
	merged_histogram.ImportDicts(merged_dicts)
	histograms.Merge(merged_histogram)
	else:
	logging.error('Test %s failed to record some samples' % test)

	if args.isolated_script_test_output:
	results.save_to_output_file(args.test_suite, args.isolated_script_test_output)

	# Uses special output files to match the merge script.
	_save_extra_output_files(args, results, histograms)

	if args.isolated_script_test_perf_output:
	with open(args.isolated_script_test_perf_output, 'w') as out_file:
	out_file.write(json.dumps(histograms.AsDicts(), indent=2))

	end_time = time.time()
	logging.info('Elapsed time: %.2lf seconds.' % (end_time - start_time))

	return EXIT_SUCCESS


	# This is not really a "script test" so does not need to manually add
	# any additional compile targets.
	def main_compile_targets(args):
	json.dump([], args.output)


	if __name__ == '__main__':
	# Conform minimally to the protocol defined by ScriptTest.
	if 'compile_targets' in sys.argv:
	funcs = {
	'run': None,
	'compile_targets': main_compile_targets,
	}
	sys.exit(common.run_script(sys.argv[1:], funcs))
	sys.exit(main())