WebDriverTests/imported/w3c/tools/wptrunner/wptrunner/stability.py - WebKit - Git at Google

 import copy
 import functools
 import imp
 import io
 import os
 from collections import OrderedDict, defaultdict
 from datetime import datetime

 from mozlog import reader
 from mozlog.formatters import JSONFormatter
 from mozlog.handlers import BaseHandler, StreamHandler, LogLevelFilter

 here = os.path.dirname(__file__)
 localpaths = imp.load_source("localpaths", os.path.abspath(os.path.join(here, os.pardir, os.pardir, "localpaths.py")))
 from wpt.markdown import markdown_adjust, table


 # If a test takes more than (FLAKY_THRESHOLD*timeout) and does not consistently
 # time out, it is considered slow (potentially flaky).
 FLAKY_THRESHOLD = 0.8


 class LogActionFilter(BaseHandler):

     """Handler that filters out messages not of a given set of actions.

     Subclasses BaseHandler.

     :param inner: Handler to use for messages that pass this filter
     :param actions: List of actions for which to fire the handler
     """

     def __init__(self, inner, actions):
         """Extend BaseHandler and set inner and actions props on self."""
         BaseHandler.__init__(self, inner)
         self.inner = inner
         self.actions = actions

     def __call__(self, item):
         """Invoke handler if action is in list passed as constructor param."""
         if item["action"] in self.actions:
             return self.inner(item)


 class LogHandler(reader.LogHandler):

     """Handle updating test and subtest status in log.

     Subclasses reader.LogHandler.
     """
     def __init__(self):
         self.results = OrderedDict()

     def find_or_create_test(self, data):
         test_name = data["test"]
         if self.results.get(test_name):
             return self.results[test_name]

         test = {
             "subtests": OrderedDict(),
             "status": defaultdict(int),
             "longest_duration": defaultdict(float),
         }
         self.results[test_name] = test
         return test

     def find_or_create_subtest(self, data):
         test = self.find_or_create_test(data)
         subtest_name = data["subtest"]

         if test["subtests"].get(subtest_name):
             return test["subtests"][subtest_name]

         subtest = {
             "status": defaultdict(int),
             "messages": set()
         }
         test["subtests"][subtest_name] = subtest

         return subtest

     def test_start(self, data):
         test = self.find_or_create_test(data)
         test["start_time"] = data["time"]

     def test_status(self, data):
         subtest = self.find_or_create_subtest(data)
         subtest["status"][data["status"]] += 1
         if data.get("message"):
             subtest["messages"].add(data["message"])

     def test_end(self, data):
         test = self.find_or_create_test(data)
         test["status"][data["status"]] += 1
         # Timestamps are in ms since epoch.
         duration = data["time"] - test.pop("start_time")
         test["longest_duration"][data["status"]] = max(
             duration, test["longest_duration"][data["status"]])
         try:
             # test_timeout is in seconds; convert it to ms.
             test["timeout"] = data["extra"]["test_timeout"] * 1000
         except KeyError:
             # If a test is skipped, it won't have extra info.
             pass


 def is_inconsistent(results_dict, iterations):
     """Return whether or not a single test is inconsistent."""
     if 'SKIP' in results_dict:
         return False
     return len(results_dict) > 1 or sum(results_dict.values()) != iterations


 def find_slow_status(test):
     """Check if a single test almost times out.

     We are interested in tests that almost time out (i.e. likely to be flaky).
     Therefore, timeout statuses are ignored, including (EXTERNAL-)TIMEOUT.
     CRASH & ERROR are also ignored because the they override TIMEOUT; a test
     that both crashes and times out is marked as CRASH, so it won't be flaky.

     Returns:
         A result status produced by a run that almost times out; None, if no
         runs almost time out.
     """
     if "timeout" not in test:
         return None
     threshold = test["timeout"] * FLAKY_THRESHOLD
     for status in ['PASS', 'FAIL', 'OK']:
         if (status in test["longest_duration"] and
             test["longest_duration"][status] > threshold):
             return status
     return None


 def process_results(log, iterations):
     """Process test log and return overall results and list of inconsistent tests."""
     inconsistent = []
     slow = []
     handler = LogHandler()
     reader.handle_log(reader.read(log), handler)
     results = handler.results
     for test_name, test in results.iteritems():
         if is_inconsistent(test["status"], iterations):
             inconsistent.append((test_name, None, test["status"], []))
         for subtest_name, subtest in test["subtests"].iteritems():
             if is_inconsistent(subtest["status"], iterations):
                 inconsistent.append((test_name, subtest_name, subtest["status"], subtest["messages"]))

         slow_status = find_slow_status(test)
         if slow_status is not None:
             slow.append((
                 test_name,
                 slow_status,
                 test["longest_duration"][slow_status],
                 test["timeout"]
             ))

     return results, inconsistent, slow


 def err_string(results_dict, iterations):
     """Create and return string with errors from test run."""
     rv = []
     total_results = sum(results_dict.values())
     for key, value in sorted(results_dict.items()):
         rv.append("%s%s" %
                   (key, ": %s/%s" % (value, iterations) if value != iterations else ""))
     if total_results < iterations:
         rv.append("MISSING: %s/%s" % (iterations - total_results, iterations))
     rv = ", ".join(rv)
     if is_inconsistent(results_dict, iterations):
         rv = "**%s**" % rv
     return rv


 def write_inconsistent(log, inconsistent, iterations):
     """Output inconsistent tests to logger.error."""
     log("## Unstable results ##\n")
     strings = [(
         "`%s`" % markdown_adjust(test),
         ("`%s`" % markdown_adjust(subtest)) if subtest else "",
         err_string(results, iterations),
         ("`%s`" % markdown_adjust(";".join(messages))) if len(messages) else "")
         for test, subtest, results, messages in inconsistent]
     table(["Test", "Subtest", "Results", "Messages"], strings, log)


 def write_slow_tests(log, slow):
     log("## Slow tests ##\n")
     strings = [(
         "`%s`" % markdown_adjust(test),
         "`%s`" % status,
         "`%.0f`" % duration,
         "`%.0f`" % timeout)
         for test, status, duration, timeout in slow]
     table(["Test", "Result", "Longest duration (ms)", "Timeout (ms)"], strings, log)


 def write_results(log, results, iterations, pr_number=None, use_details=False):
     log("## All results ##\n")
     if use_details:
         log("<details>\n")
         log("<summary>%i %s ran</summary>\n\n" % (len(results),
                                                   "tests" if len(results) > 1
                                                   else "test"))

     for test_name, test in results.iteritems():
         baseurl = "http://w3c-test.org/submissions"
         if "https" in os.path.splitext(test_name)[0].split(".")[1:]:
             baseurl = "https://w3c-test.org/submissions"
         title = test_name
         if use_details:
             log("<details>\n")
             if pr_number:
                 title = "<a href=\"%s/%s%s\">%s</a>" % (baseurl, pr_number, test_name, title)
             log('<summary>%s</summary>\n\n' % title)
         else:
             log("### %s ###" % title)
         strings = [("", err_string(test["status"], iterations), "")]

         strings.extend(((
             ("`%s`" % markdown_adjust(subtest_name)) if subtest else "",
             err_string(subtest["status"], iterations),
             ("`%s`" % markdown_adjust(';'.join(subtest["messages"]))) if len(subtest["messages"]) else "")
             for subtest_name, subtest in test["subtests"].items()))
         table(["Subtest", "Results", "Messages"], strings, log)
         if use_details:
             log("</details>\n")

     if use_details:
         log("</details>\n")


 def run_step(logger, iterations, restart_after_iteration, kwargs_extras, **kwargs):
     import wptrunner
     kwargs = copy.deepcopy(kwargs)

     if restart_after_iteration:
         kwargs["repeat"] = iterations
     else:
         kwargs["rerun"] = iterations

     kwargs["pause_after_test"] = False
     kwargs.update(kwargs_extras)

     def wrap_handler(x):
         if not kwargs["verify_log_full"]:
             x = LogLevelFilter(x, "WARNING")
             x = LogActionFilter(x, ["log", "process_output"])
         return x

     initial_handlers = logger._state.handlers
     logger._state.handlers = [wrap_handler(handler)
                               for handler in initial_handlers]

     log = io.BytesIO()
     # Setup logging for wptrunner that keeps process output and
     # warning+ level logs only
     logger.add_handler(StreamHandler(log, JSONFormatter()))

     wptrunner.run_tests(**kwargs)

     logger._state.handlers = initial_handlers
     logger._state.running_tests = set()
     logger._state.suite_started = False

     log.seek(0)
     results, inconsistent, slow = process_results(log, iterations)
     return results, inconsistent, slow, iterations


 def get_steps(logger, repeat_loop, repeat_restart, kwargs_extras):
     steps = []
     for kwargs_extra in kwargs_extras:
         if kwargs_extra:
             flags_string = " with flags %s" % " ".join(
                 "%s=%s" % item for item in kwargs_extra.iteritems())
         else:
             flags_string = ""

         if repeat_loop:
             desc = "Running tests in a loop %d times%s" % (repeat_loop,
                                                            flags_string)
             steps.append((desc, functools.partial(run_step, logger, repeat_loop, False, kwargs_extra)))

         if repeat_restart:
             desc = "Running tests in a loop with restarts %s times%s" % (repeat_restart,
                                                                          flags_string)
             steps.append((desc, functools.partial(run_step, logger, repeat_restart, True, kwargs_extra)))

     return steps


 def write_summary(logger, step_results, final_result):
     for desc, result in step_results:
         logger.info('::: %s : %s' % (desc, result))
     logger.info(':::')
     if final_result == "PASS":
         log = logger.info
     elif final_result == "TIMEOUT":
         log = logger.warning
     else:
         log = logger.error
     log('::: Test verification %s' % final_result)

     logger.info(':::')

 def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, max_time=None,
                     output_results=True, **kwargs):
     kwargs_extras = [{}]
     if chaos_mode and kwargs["product"] == "firefox":
         kwargs_extras.append({"chaos_mode_flags": 3})

     steps = get_steps(logger, repeat_loop, repeat_restart, kwargs_extras)

     start_time = datetime.now()
     step_results = []

     for desc, step_func in steps:
         if max_time and datetime.now() - start_time > max_time:
             logger.info("::: Test verification is taking too long: Giving up!")
             logger.info("::: So far, all checks passed, but not all checks were run.")
             write_summary(logger, step_results, "TIMEOUT")
             return 2

         logger.info(':::')
         logger.info('::: Running test verification step "%s"...' % desc)
         logger.info(':::')
         results, inconsistent, slow, iterations = step_func(**kwargs)
         if output_results:
             write_results(logger.info, results, iterations)

         if inconsistent:
             step_results.append((desc, "FAIL"))
             write_inconsistent(logger.info, inconsistent, iterations)
             write_summary(logger, step_results, "FAIL")
             return 1

         if slow:
             step_results.append((desc, "FAIL"))
             write_slow_tests(logger.info, slow)
             write_summary(logger, step_results, "FAIL")
             return 1

         step_results.append((desc, "PASS"))

     write_summary(logger, step_results, "PASS")
	import copy
	import functools
	import imp
	import io
	import os
	from collections import OrderedDict, defaultdict
	from datetime import datetime

	from mozlog import reader
	from mozlog.formatters import JSONFormatter
	from mozlog.handlers import BaseHandler, StreamHandler, LogLevelFilter

	here = os.path.dirname(__file__)
	localpaths = imp.load_source("localpaths", os.path.abspath(os.path.join(here, os.pardir, os.pardir, "localpaths.py")))
	from wpt.markdown import markdown_adjust, table


	# If a test takes more than (FLAKY_THRESHOLD*timeout) and does not consistently
	# time out, it is considered slow (potentially flaky).
	FLAKY_THRESHOLD = 0.8


	class LogActionFilter(BaseHandler):

	"""Handler that filters out messages not of a given set of actions.

	Subclasses BaseHandler.

	:param inner: Handler to use for messages that pass this filter
	:param actions: List of actions for which to fire the handler
	"""

	def __init__(self, inner, actions):
	"""Extend BaseHandler and set inner and actions props on self."""
	BaseHandler.__init__(self, inner)
	self.inner = inner
	self.actions = actions

	def __call__(self, item):
	"""Invoke handler if action is in list passed as constructor param."""
	if item["action"] in self.actions:
	return self.inner(item)


	class LogHandler(reader.LogHandler):

	"""Handle updating test and subtest status in log.

	Subclasses reader.LogHandler.
	"""
	def __init__(self):
	self.results = OrderedDict()

	def find_or_create_test(self, data):
	test_name = data["test"]
	if self.results.get(test_name):
	return self.results[test_name]

	test = {
	"subtests": OrderedDict(),
	"status": defaultdict(int),
	"longest_duration": defaultdict(float),
	}
	self.results[test_name] = test
	return test

	def find_or_create_subtest(self, data):
	test = self.find_or_create_test(data)
	subtest_name = data["subtest"]

	if test["subtests"].get(subtest_name):
	return test["subtests"][subtest_name]

	subtest = {
	"status": defaultdict(int),
	"messages": set()
	}
	test["subtests"][subtest_name] = subtest

	return subtest

	def test_start(self, data):
	test = self.find_or_create_test(data)
	test["start_time"] = data["time"]

	def test_status(self, data):
	subtest = self.find_or_create_subtest(data)
	subtest["status"][data["status"]] += 1
	if data.get("message"):
	subtest["messages"].add(data["message"])

	def test_end(self, data):
	test = self.find_or_create_test(data)
	test["status"][data["status"]] += 1
	# Timestamps are in ms since epoch.
	duration = data["time"] - test.pop("start_time")
	test["longest_duration"][data["status"]] = max(
	duration, test["longest_duration"][data["status"]])
	try:
	# test_timeout is in seconds; convert it to ms.
	test["timeout"] = data["extra"]["test_timeout"] * 1000
	except KeyError:
	# If a test is skipped, it won't have extra info.
	pass


	def is_inconsistent(results_dict, iterations):
	"""Return whether or not a single test is inconsistent."""
	if 'SKIP' in results_dict:
	return False
	return len(results_dict) > 1 or sum(results_dict.values()) != iterations


	def find_slow_status(test):
	"""Check if a single test almost times out.

	We are interested in tests that almost time out (i.e. likely to be flaky).
	Therefore, timeout statuses are ignored, including (EXTERNAL-)TIMEOUT.
	CRASH & ERROR are also ignored because the they override TIMEOUT; a test
	that both crashes and times out is marked as CRASH, so it won't be flaky.

	Returns:
	A result status produced by a run that almost times out; None, if no
	runs almost time out.
	"""
	if "timeout" not in test:
	return None
	threshold = test["timeout"] * FLAKY_THRESHOLD
	for status in ['PASS', 'FAIL', 'OK']:
	if (status in test["longest_duration"] and
	test["longest_duration"][status] > threshold):
	return status
	return None


	def process_results(log, iterations):
	"""Process test log and return overall results and list of inconsistent tests."""
	inconsistent = []
	slow = []
	handler = LogHandler()
	reader.handle_log(reader.read(log), handler)
	results = handler.results
	for test_name, test in results.iteritems():
	if is_inconsistent(test["status"], iterations):
	inconsistent.append((test_name, None, test["status"], []))
	for subtest_name, subtest in test["subtests"].iteritems():
	if is_inconsistent(subtest["status"], iterations):
	inconsistent.append((test_name, subtest_name, subtest["status"], subtest["messages"]))

	slow_status = find_slow_status(test)
	if slow_status is not None:
	slow.append((
	test_name,
	slow_status,
	test["longest_duration"][slow_status],
	test["timeout"]
	))

	return results, inconsistent, slow


	def err_string(results_dict, iterations):
	"""Create and return string with errors from test run."""
	rv = []
	total_results = sum(results_dict.values())
	for key, value in sorted(results_dict.items()):
	rv.append("%s%s" %
	(key, ": %s/%s" % (value, iterations) if value != iterations else ""))
	if total_results < iterations:
	rv.append("MISSING: %s/%s" % (iterations - total_results, iterations))
	rv = ", ".join(rv)
	if is_inconsistent(results_dict, iterations):
	rv = "%s" % rv
	return rv


	def write_inconsistent(log, inconsistent, iterations):
	"""Output inconsistent tests to logger.error."""
	log("## Unstable results ##\n")
	strings = [(
	"`%s`" % markdown_adjust(test),
	("`%s`" % markdown_adjust(subtest)) if subtest else "",
	err_string(results, iterations),
	("`%s`" % markdown_adjust(";".join(messages))) if len(messages) else "")
	for test, subtest, results, messages in inconsistent]
	table(["Test", "Subtest", "Results", "Messages"], strings, log)


	def write_slow_tests(log, slow):
	log("## Slow tests ##\n")
	strings = [(
	"`%s`" % markdown_adjust(test),
	"`%s`" % status,
	"`%.0f`" % duration,
	"`%.0f`" % timeout)
	for test, status, duration, timeout in slow]
	table(["Test", "Result", "Longest duration (ms)", "Timeout (ms)"], strings, log)


	def write_results(log, results, iterations, pr_number=None, use_details=False):
	log("## All results ##\n")
	if use_details:
	log("<details>\n")
	log("<summary>%i %s ran</summary>\n\n" % (len(results),
	"tests" if len(results) > 1
	else "test"))

	for test_name, test in results.iteritems():
	baseurl = "http://w3c-test.org/submissions"
	if "https" in os.path.splitext(test_name)[0].split(".")[1:]:
	baseurl = "https://w3c-test.org/submissions"
	title = test_name
	if use_details:
	log("<details>\n")
	if pr_number:
	title = "<a href=\"%s/%s%s\">%s</a>" % (baseurl, pr_number, test_name, title)
	log('<summary>%s</summary>\n\n' % title)
	else:
	log("### %s ###" % title)
	strings = [("", err_string(test["status"], iterations), "")]

	strings.extend(((
	("`%s`" % markdown_adjust(subtest_name)) if subtest else "",
	err_string(subtest["status"], iterations),
	("`%s`" % markdown_adjust(';'.join(subtest["messages"]))) if len(subtest["messages"]) else "")
	for subtest_name, subtest in test["subtests"].items()))
	table(["Subtest", "Results", "Messages"], strings, log)
	if use_details:
	log("</details>\n")

	if use_details:
	log("</details>\n")


	def run_step(logger, iterations, restart_after_iteration, kwargs_extras, **kwargs):
	import wptrunner
	kwargs = copy.deepcopy(kwargs)

	if restart_after_iteration:
	kwargs["repeat"] = iterations
	else:
	kwargs["rerun"] = iterations

	kwargs["pause_after_test"] = False
	kwargs.update(kwargs_extras)

	def wrap_handler(x):
	if not kwargs["verify_log_full"]:
	x = LogLevelFilter(x, "WARNING")
	x = LogActionFilter(x, ["log", "process_output"])
	return x

	initial_handlers = logger._state.handlers
	logger._state.handlers = [wrap_handler(handler)
	for handler in initial_handlers]

	log = io.BytesIO()
	# Setup logging for wptrunner that keeps process output and
	# warning+ level logs only
	logger.add_handler(StreamHandler(log, JSONFormatter()))

	wptrunner.run_tests(**kwargs)

	logger._state.handlers = initial_handlers
	logger._state.running_tests = set()
	logger._state.suite_started = False

	log.seek(0)
	results, inconsistent, slow = process_results(log, iterations)
	return results, inconsistent, slow, iterations


	def get_steps(logger, repeat_loop, repeat_restart, kwargs_extras):
	steps = []
	for kwargs_extra in kwargs_extras:
	if kwargs_extra:
	flags_string = " with flags %s" % " ".join(
	"%s=%s" % item for item in kwargs_extra.iteritems())
	else:
	flags_string = ""

	if repeat_loop:
	desc = "Running tests in a loop %d times%s" % (repeat_loop,
	flags_string)
	steps.append((desc, functools.partial(run_step, logger, repeat_loop, False, kwargs_extra)))

	if repeat_restart:
	desc = "Running tests in a loop with restarts %s times%s" % (repeat_restart,
	flags_string)
	steps.append((desc, functools.partial(run_step, logger, repeat_restart, True, kwargs_extra)))

	return steps


	def write_summary(logger, step_results, final_result):
	for desc, result in step_results:
	logger.info('::: %s : %s' % (desc, result))
	logger.info(':::')
	if final_result == "PASS":
	log = logger.info
	elif final_result == "TIMEOUT":
	log = logger.warning
	else:
	log = logger.error
	log('::: Test verification %s' % final_result)

	logger.info(':::')

	def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, max_time=None,
	output_results=True, **kwargs):
	kwargs_extras = [{}]
	if chaos_mode and kwargs["product"] == "firefox":
	kwargs_extras.append({"chaos_mode_flags": 3})

	steps = get_steps(logger, repeat_loop, repeat_restart, kwargs_extras)

	start_time = datetime.now()
	step_results = []

	for desc, step_func in steps:
	if max_time and datetime.now() - start_time > max_time:
	logger.info("::: Test verification is taking too long: Giving up!")
	logger.info("::: So far, all checks passed, but not all checks were run.")
	write_summary(logger, step_results, "TIMEOUT")
	return 2

	logger.info(':::')
	logger.info('::: Running test verification step "%s"...' % desc)
	logger.info(':::')
	results, inconsistent, slow, iterations = step_func(**kwargs)
	if output_results:
	write_results(logger.info, results, iterations)

	if inconsistent:
	step_results.append((desc, "FAIL"))
	write_inconsistent(logger.info, inconsistent, iterations)
	write_summary(logger, step_results, "FAIL")
	return 1

	if slow:
	step_results.append((desc, "FAIL"))
	write_slow_tests(logger.info, slow)
	write_summary(logger, step_results, "FAIL")
	return 1

	step_results.append((desc, "PASS"))

	write_summary(logger, step_results, "PASS")