Blame - PerformanceTests/JSBench/harness.py - WebKit

blob: fd30cc8c5d26bfb0f211a5793813741fa266510a [file] [log] [blame]

keith_miller@apple.com	a8f138f	2016-05-24 19:00:51 +0000	[diff] [blame]	1	#!/usr/bin/env python
				2	# Copyright (C) 2011, 2012 Purdue University
				3	# Written by Gregor Richards
				4	# All rights reserved.
				5	#
				6	# Redistribution and use in source and binary forms, with or without
				7	# modification, are permitted provided that the following conditions are met:
				8	#
				9	# 1. Redistributions of source code must retain the above copyright notice,
				10	# this list of conditions and the following disclaimer.
				11	# 2. Redistributions in binary form must reproduce the above copyright notice,
				12	# this list of conditions and the following disclaimer in the documentation
				13	# and/or other materials provided with the distribution.
				14	#
				15	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
				16	# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				17	# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				18	# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
				19	# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
				20	# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
				21	# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				22	# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
				23	# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
				24	# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
				25	# POSSIBILITY OF SUCH DAMAGE.
				26
				27	import math
				28	import os
				29	import re
				30	import sys
				31
				32	benchmarks = ["amazon/chrome", "amazon/firefox", "amazon/safari",
				33	"facebook/chrome", "facebook/firefox", "facebook/safari",
				34	"google/chrome", "google/firefox", "google/safari",
				35	"twitter/chrome", "twitter/firefox", "twitter/safari",
				36	"yahoo/chrome", "yahoo/firefox", "yahoo/safari"]
				37	modes = {
				38	"*": ["urem"],
				39	"amazon/firefox": ["urm"],
				40	"google/firefox": ["uem"]
				41	}
				42	runcount = 25
				43	keepruns = 20
				44
				45	keepfrom = runcount - keepruns
				46
				47	if len(sys.argv) != 2:
annulen@yandex.ru	70acd2e	2017-12-08 21:56:09 +0000	[diff] [blame]	48	print("Use: python harness.py <JS executable>")
keith_miller@apple.com	a8f138f	2016-05-24 19:00:51 +0000	[diff] [blame]	49	exit(1)
				50	js = sys.argv[1]
				51
				52	# standard t-distribution for normally distributed samples
				53	tDistribution = [0, 0, 12.71, 4.30, 3.18, 2.78, 2.57, 2.45, 2.36, 2.31, 2.26,
				54	2.23, 2.20, 2.18, 2.16, 2.14, 2.13, 2.12, 2.11, 2.10, 2.09, 2.09, 2.08, 2.07,
				55	2.07, 2.06, 2.06, 2.06, 2.05, 2.05, 2.05, 2.04, 2.04, 2.04, 2.03, 2.03, 2.03,
				56	2.03, 2.03, 2.02, 2.02, 2.02, 2.02, 2.02, 2.02, 2.02, 2.01, 2.01, 2.01, 2.01,
				57	2.01, 2.01, 2.01, 2.01, 2.01, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00,
				58	2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99,
				59	1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99,
				60	1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.98, 1.98, 1.98, 1.98, 1.98,
				61	1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98,
				62	1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98,
				63	1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98,
				64	1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98,
				65	1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				66	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				67	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				68	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				69	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				70	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				71	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				72	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				73	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				74	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				75	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				76	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				77	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				78	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				79	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				80	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				81	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				82	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				83	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				84	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				85	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				86	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				87	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				88	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
				89	1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.96]
				90
				91	def tDist(n):
				92	if (n >= len(tDistribution)):
				93	return tDistribution[-1]
				94	return tDistribution[n]
				95
				96	results = {}
				97
				98	for benchmark in benchmarks:
				99	results[benchmark] = {}
				100
				101	bmodes = modes["*"]
				102	if benchmark in modes:
				103	bmodes = modes[benchmark]
				104
				105	for mode in bmodes:
				106	results[benchmark][mode] = []
				107
				108	for runno in range(runcount):
				109	# Now run it and get the results
				110	print(benchmark + " " + mode + " " + str(runno))
				111	res = os.popen(js + " " + benchmark + "/" + mode + ".js").read()
				112	time = float(re.match("Time: ([0-9]*)ms", res).group(1))
				113
				114	if runno >= keepfrom:
				115	results[benchmark][mode].append(time)
				116
				117	# Collect the totals
				118	sresults = {}
				119	totals = {
				120	"mean": 1,
				121	"stddev": 1,
				122	"sem": 1,
				123	"ci": 1,
				124	"runs": 0
				125	}
				126
				127	for benchmark in benchmarks:
				128	sresults[benchmark] = {}
				129
				130	bmodes = modes["*"]
				131	if benchmark in modes:
				132	bmodes = modes[benchmark]
				133
				134	for mode in bmodes:
				135	sresults[benchmark][mode] = sresult = {}
				136	result = results[benchmark][mode]
				137	totals["runs"] = totals["runs"] + 1
				138
				139	sresult["mode"] = mode
				140
				141	mean = sresult["mean"] = sum(result) / len(result)
				142	stddev = sresult["stddev"] = math.sqrt(
				143	sum(
				144	map(lambda e: math.pow(e - mean, 2), result)
				145	) / (len(result) - 1)
				146	)
				147
				148	sm = sresult["sm"] = stddev / mean
				149	sem = sresult["sem"] = stddev / math.sqrt(len(result))
				150	semm = sresult["semm"] = sem / mean
				151	ci = sresult["ci"] = tDist(len(result)) * sem
				152	cim = sresult["cim"] = ci / mean
				153
				154	totals["mean"] *= mean
				155	totals["stddev"] *= stddev
				156	totals["sem"] *= sem
				157	totals["ci"] *= ci
				158
				159	power = 1 / totals["runs"]
				160	totals["mean"] = math.pow(totals["mean"], power)
				161	totals["stddev"] = math.pow(totals["stddev"], power)
				162	totals["sm"] = totals["stddev"] / totals["mean"]
				163	totals["sem"] = math.pow(totals["sem"], power)
				164	totals["semm"] = totals["sem"] / totals["mean"]
				165	totals["ci"] = math.pow(totals["ci"], power)
				166	totals["cim"] = totals["ci"] / totals["mean"]
				167
				168	totals["sm"] *= 100
				169	totals["semm"] *= 100
				170	totals["cim"] *= 100
				171
annulen@yandex.ru	70acd2e	2017-12-08 21:56:09 +0000	[diff] [blame]	172	print("Final results:")
				173	print(u" %(mean)fms \u00b1 %(cim)f%% (lower is better)" % totals)
				174	print(" Standard deviation = %(sm)f%% of mean" % totals)
				175	print(" Standard error = %(semm)f%% of mean" % totals)
				176	print(" %(runs)d runs" % {"runs": runcount})
				177	print("")
keith_miller@apple.com	a8f138f	2016-05-24 19:00:51 +0000	[diff] [blame]	178
annulen@yandex.ru	70acd2e	2017-12-08 21:56:09 +0000	[diff] [blame]	179	print("Result breakdown:")
keith_miller@apple.com	a8f138f	2016-05-24 19:00:51 +0000	[diff] [blame]	180	for benchmark in benchmarks:
annulen@yandex.ru	70acd2e	2017-12-08 21:56:09 +0000	[diff] [blame]	181	print(" %(benchmark)s:" % {"benchmark": benchmark})
keith_miller@apple.com	a8f138f	2016-05-24 19:00:51 +0000	[diff] [blame]	182
				183	bmodes = modes["*"]
				184	if benchmark in modes:
				185	bmodes = modes[benchmark]
				186
				187	for mode in bmodes:
annulen@yandex.ru	70acd2e	2017-12-08 21:56:09 +0000	[diff] [blame]	188	print(u" %(mode)s: %(mean)fms \u00b1 %(cim)f%% (stddev=%(sm)f%%, stderr=%(semm)f%%)" % sresults[benchmark][mode])
				189	print("")
keith_miller@apple.com	a8f138f	2016-05-24 19:00:51 +0000	[diff] [blame]	190
annulen@yandex.ru	70acd2e	2017-12-08 21:56:09 +0000	[diff] [blame]	191	print("Raw results:")
keith_miller@apple.com	a8f138f	2016-05-24 19:00:51 +0000	[diff] [blame]	192	for benchmark in benchmarks:
annulen@yandex.ru	70acd2e	2017-12-08 21:56:09 +0000	[diff] [blame]	193	print(" %(benchmark)s:" % {"benchmark": benchmark})
keith_miller@apple.com	a8f138f	2016-05-24 19:00:51 +0000	[diff] [blame]	194
				195	bmodes = modes["*"]
				196	if benchmark in modes:
				197	bmodes = modes[benchmark]
				198
				199	for mode in bmodes:
annulen@yandex.ru	70acd2e	2017-12-08 21:56:09 +0000	[diff] [blame]	200	print(" %(mode)s: %(results)s" % {
keith_miller@apple.com	a8f138f	2016-05-24 19:00:51 +0000	[diff] [blame]	201	"mode": mode,
				202	"results": results[benchmark][mode]
annulen@yandex.ru	70acd2e	2017-12-08 21:56:09 +0000	[diff] [blame]	203	})