blob: 7f43b53c5d1bcc50e194584fd0c031c6aab45787 [file] [log] [blame]
#!/usr/bin/env python -u
# Copyright (C) 2019 Apple Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. Neither the name of Apple Inc. ("Apple") nor the names of
# its contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import sys
import argparse
import json
try:
from scipy import stats
except:
print "ERROR: scipy package is not installed. Run `pip install scipy`"
sys.exit(1)
try:
import numpy
except:
print "ERROR: numpy package is not installed. Run `pip install numpy`"
sys.exit(1)
def readJSONFile(path):
with open(path, 'r') as contents:
return json.loads(contents.read())
Speedometer2 = "Speedometer2"
JetStream2 = "JetStream2"
PLT5 = "PLT5"
MotionMark = "MotionMark"
MotionMark1_1 = "MotionMark-1.1"
def detectJetStream2(payload):
return "JetStream2.0" in payload
def JetStream2Results(payload):
assert detectJetStream2(payload)
js = payload["JetStream2.0"]
iterations = len(js["tests"]["gaussian-blur"]["metrics"]["Score"]["current"])
results = []
for i in range(iterations):
scores = []
for test in js["tests"].keys():
scores.append(js["tests"][test]["metrics"]["Score"]["current"][i])
geomean = stats.gmean(scores)
results.append(geomean)
return results
def detectSpeedometer2(payload):
return "Speedometer-2" in payload
def Speedometer2Results(payload):
assert detectSpeedometer2(payload)
results = []
for arr in payload["Speedometer-2"]["metrics"]["Score"]["current"]:
results.append(numpy.mean(arr))
return results
def detectPLT5(payload):
if "iterations" not in payload:
return False
iterations = payload["iterations"]
if not isinstance(iterations, list):
return False
if not len(iterations):
return False
if "cold" not in iterations[0]:
return False
if "warm" not in iterations[0]:
return False
if "Geometric" not in iterations[0]:
return False
return True
def PLT5Results(payload):
assert detectPLT5(payload)
results = []
for obj in payload["iterations"]:
results.append(obj["Geometric"])
return results
def detectMotionMark(payload):
return "MotionMark" in payload
def detectMotionMark1_1(payload):
return "MotionMark-1.1" in payload
def motionMarkResults(payload):
assert detectMotionMark(payload) or detectMotionMark1_1(payload)
if detectMotionMark(payload):
payload = payload["MotionMark"]
else:
payload = payload["MotionMark-1.1"]
testNames = payload["tests"].keys()
numTests = len(payload["tests"][testNames[0]]["metrics"]["Score"]["current"])
results = []
for i in range(numTests):
scores = []
for test in testNames:
scores.append(payload["tests"][test]["metrics"]["Score"]["current"][i])
results.append(stats.gmean(scores))
return results
def motionMark1_1Results(payload):
return motionMarkResults(payload)
def detectBenchmark(payload):
if detectJetStream2(payload):
return JetStream2
if detectSpeedometer2(payload):
return Speedometer2
if detectPLT5(payload):
return PLT5
if detectMotionMark(payload):
return MotionMark
if detectMotionMark1_1(payload):
return MotionMark1_1
return None
def biggerIsBetter(benchmarkType):
if benchmarkType == JetStream2:
return True
if benchmarkType == Speedometer2:
return True
if benchmarkType == MotionMark:
return True
if benchmarkType == MotionMark1_1:
return True
if benchmarkType == PLT5:
return False
print "Should not be reached."
assert False
def ttest(benchmarkType, a, b):
# We use two-tailed Welch's
(tStatistic, pValue) = stats.ttest_ind(a, b, equal_var=False)
aMean = numpy.mean(a)
bMean = numpy.mean(b)
print "a mean = {:.5f}".format(aMean)
print "b mean = {:.5f}".format(bMean)
print "pValue = {:.10f}".format(pValue)
if biggerIsBetter(benchmarkType):
print "(Bigger means are better.)"
if aMean > bMean:
print "{:.3f} times worse".format((aMean / bMean))
else:
print "{:.3f} times better".format((bMean / aMean))
else:
print "(Smaller means are better.)"
if aMean > bMean:
print "{:.3f} times better".format((aMean / bMean))
else:
print "{:.3f} times worse".format((bMean / aMean))
if pValue <= 0.05:
print "Results ARE significant"
else:
print "Results ARE NOT significant"
def getOptions():
parser = argparse.ArgumentParser(description="Compare two WebKit benchmark results. Pass in two JSON result files to compare them. This script prints the pValue along with the magnitude of the change.")
parser.add_argument("-a",
type=str,
required=True,
help="a of a/b. Path to JSON results file.")
parser.add_argument("-b",
type=str,
required=True,
help="b of a/b. Path to JSON results file.")
return parser.parse_known_args()[0]
def main():
args = getOptions()
a = readJSONFile(args.a)
b = readJSONFile(args.b)
typeA = detectBenchmark(a)
typeB = detectBenchmark(b)
if typeA != typeB:
print "-a and -b are not the same benchmark. a={} b={}".format(typeA, typeB)
sys.exit(1)
if not (typeA and typeB):
print "Unknown benchmark type. a={} b={}".format(typeA, typeB)
sys.exit(1)
if typeA == JetStream2:
ttest(typeA, JetStream2Results(a), JetStream2Results(b))
elif typeA == Speedometer2:
ttest(typeA, Speedometer2Results(a), Speedometer2Results(b))
elif typeA == MotionMark:
ttest(typeA, motionMarkResults(a), motionMarkResults(b))
elif typeA == MotionMark1_1:
ttest(typeA, motionMark1_1Results(a), motionMark1_1Results(b))
elif typeA == PLT5:
ttest(typeA, PLT5Results(a), PLT5Results(b))
else:
print "Unknown benchmark type"
sys.exit(1)
if __name__ == "__main__":
main()