| import copy |
| import functools |
| import imp |
| import io |
| import os |
| from collections import OrderedDict, defaultdict |
| from datetime import datetime |
| |
| from mozlog import reader |
| from mozlog.formatters import JSONFormatter |
| from mozlog.handlers import BaseHandler, StreamHandler, LogLevelFilter |
| |
| here = os.path.dirname(__file__) |
| localpaths = imp.load_source("localpaths", os.path.abspath(os.path.join(here, os.pardir, os.pardir, "localpaths.py"))) |
| from wpt.markdown import markdown_adjust, table |
| |
| |
| # If a test takes more than (FLAKY_THRESHOLD*timeout) and does not consistently |
| # time out, it is considered slow (potentially flaky). |
| FLAKY_THRESHOLD = 0.8 |
| |
| |
| class LogActionFilter(BaseHandler): |
| |
| """Handler that filters out messages not of a given set of actions. |
| |
| Subclasses BaseHandler. |
| |
| :param inner: Handler to use for messages that pass this filter |
| :param actions: List of actions for which to fire the handler |
| """ |
| |
| def __init__(self, inner, actions): |
| """Extend BaseHandler and set inner and actions props on self.""" |
| BaseHandler.__init__(self, inner) |
| self.inner = inner |
| self.actions = actions |
| |
| def __call__(self, item): |
| """Invoke handler if action is in list passed as constructor param.""" |
| if item["action"] in self.actions: |
| return self.inner(item) |
| |
| |
| class LogHandler(reader.LogHandler): |
| |
| """Handle updating test and subtest status in log. |
| |
| Subclasses reader.LogHandler. |
| """ |
| def __init__(self): |
| self.results = OrderedDict() |
| |
| def find_or_create_test(self, data): |
| test_name = data["test"] |
| if self.results.get(test_name): |
| return self.results[test_name] |
| |
| test = { |
| "subtests": OrderedDict(), |
| "status": defaultdict(int), |
| "longest_duration": defaultdict(float), |
| } |
| self.results[test_name] = test |
| return test |
| |
| def find_or_create_subtest(self, data): |
| test = self.find_or_create_test(data) |
| subtest_name = data["subtest"] |
| |
| if test["subtests"].get(subtest_name): |
| return test["subtests"][subtest_name] |
| |
| subtest = { |
| "status": defaultdict(int), |
| "messages": set() |
| } |
| test["subtests"][subtest_name] = subtest |
| |
| return subtest |
| |
| def test_start(self, data): |
| test = self.find_or_create_test(data) |
| test["start_time"] = data["time"] |
| |
| def test_status(self, data): |
| subtest = self.find_or_create_subtest(data) |
| subtest["status"][data["status"]] += 1 |
| if data.get("message"): |
| subtest["messages"].add(data["message"]) |
| |
| def test_end(self, data): |
| test = self.find_or_create_test(data) |
| test["status"][data["status"]] += 1 |
| # Timestamps are in ms since epoch. |
| duration = data["time"] - test.pop("start_time") |
| test["longest_duration"][data["status"]] = max( |
| duration, test["longest_duration"][data["status"]]) |
| try: |
| # test_timeout is in seconds; convert it to ms. |
| test["timeout"] = data["extra"]["test_timeout"] * 1000 |
| except KeyError: |
| # If a test is skipped, it won't have extra info. |
| pass |
| |
| |
| def is_inconsistent(results_dict, iterations): |
| """Return whether or not a single test is inconsistent.""" |
| if 'SKIP' in results_dict: |
| return False |
| return len(results_dict) > 1 or sum(results_dict.values()) != iterations |
| |
| |
| def find_slow_status(test): |
| """Check if a single test almost times out. |
| |
| We are interested in tests that almost time out (i.e. likely to be flaky). |
| Therefore, timeout statuses are ignored, including (EXTERNAL-)TIMEOUT. |
| CRASH & ERROR are also ignored because the they override TIMEOUT; a test |
| that both crashes and times out is marked as CRASH, so it won't be flaky. |
| |
| Returns: |
| A result status produced by a run that almost times out; None, if no |
| runs almost time out. |
| """ |
| if "timeout" not in test: |
| return None |
| threshold = test["timeout"] * FLAKY_THRESHOLD |
| for status in ['PASS', 'FAIL', 'OK']: |
| if (status in test["longest_duration"] and |
| test["longest_duration"][status] > threshold): |
| return status |
| return None |
| |
| |
| def process_results(log, iterations): |
| """Process test log and return overall results and list of inconsistent tests.""" |
| inconsistent = [] |
| slow = [] |
| handler = LogHandler() |
| reader.handle_log(reader.read(log), handler) |
| results = handler.results |
| for test_name, test in results.iteritems(): |
| if is_inconsistent(test["status"], iterations): |
| inconsistent.append((test_name, None, test["status"], [])) |
| for subtest_name, subtest in test["subtests"].iteritems(): |
| if is_inconsistent(subtest["status"], iterations): |
| inconsistent.append((test_name, subtest_name, subtest["status"], subtest["messages"])) |
| |
| slow_status = find_slow_status(test) |
| if slow_status is not None: |
| slow.append(( |
| test_name, |
| slow_status, |
| test["longest_duration"][slow_status], |
| test["timeout"] |
| )) |
| |
| return results, inconsistent, slow |
| |
| |
| def err_string(results_dict, iterations): |
| """Create and return string with errors from test run.""" |
| rv = [] |
| total_results = sum(results_dict.values()) |
| for key, value in sorted(results_dict.items()): |
| rv.append("%s%s" % |
| (key, ": %s/%s" % (value, iterations) if value != iterations else "")) |
| if total_results < iterations: |
| rv.append("MISSING: %s/%s" % (iterations - total_results, iterations)) |
| rv = ", ".join(rv) |
| if is_inconsistent(results_dict, iterations): |
| rv = "**%s**" % rv |
| return rv |
| |
| |
| def write_inconsistent(log, inconsistent, iterations): |
| """Output inconsistent tests to logger.error.""" |
| log("## Unstable results ##\n") |
| strings = [( |
| "`%s`" % markdown_adjust(test), |
| ("`%s`" % markdown_adjust(subtest)) if subtest else "", |
| err_string(results, iterations), |
| ("`%s`" % markdown_adjust(";".join(messages))) if len(messages) else "") |
| for test, subtest, results, messages in inconsistent] |
| table(["Test", "Subtest", "Results", "Messages"], strings, log) |
| |
| |
| def write_slow_tests(log, slow): |
| log("## Slow tests ##\n") |
| strings = [( |
| "`%s`" % markdown_adjust(test), |
| "`%s`" % status, |
| "`%.0f`" % duration, |
| "`%.0f`" % timeout) |
| for test, status, duration, timeout in slow] |
| table(["Test", "Result", "Longest duration (ms)", "Timeout (ms)"], strings, log) |
| |
| |
| def write_results(log, results, iterations, pr_number=None, use_details=False): |
| log("## All results ##\n") |
| if use_details: |
| log("<details>\n") |
| log("<summary>%i %s ran</summary>\n\n" % (len(results), |
| "tests" if len(results) > 1 |
| else "test")) |
| |
| for test_name, test in results.iteritems(): |
| baseurl = "http://w3c-test.org/submissions" |
| if "https" in os.path.splitext(test_name)[0].split(".")[1:]: |
| baseurl = "https://w3c-test.org/submissions" |
| title = test_name |
| if use_details: |
| log("<details>\n") |
| if pr_number: |
| title = "<a href=\"%s/%s%s\">%s</a>" % (baseurl, pr_number, test_name, title) |
| log('<summary>%s</summary>\n\n' % title) |
| else: |
| log("### %s ###" % title) |
| strings = [("", err_string(test["status"], iterations), "")] |
| |
| strings.extend((( |
| ("`%s`" % markdown_adjust(subtest_name)) if subtest else "", |
| err_string(subtest["status"], iterations), |
| ("`%s`" % markdown_adjust(';'.join(subtest["messages"]))) if len(subtest["messages"]) else "") |
| for subtest_name, subtest in test["subtests"].items())) |
| table(["Subtest", "Results", "Messages"], strings, log) |
| if use_details: |
| log("</details>\n") |
| |
| if use_details: |
| log("</details>\n") |
| |
| |
| def run_step(logger, iterations, restart_after_iteration, kwargs_extras, **kwargs): |
| import wptrunner |
| kwargs = copy.deepcopy(kwargs) |
| |
| if restart_after_iteration: |
| kwargs["repeat"] = iterations |
| else: |
| kwargs["rerun"] = iterations |
| |
| kwargs["pause_after_test"] = False |
| kwargs.update(kwargs_extras) |
| |
| def wrap_handler(x): |
| if not kwargs["verify_log_full"]: |
| x = LogLevelFilter(x, "WARNING") |
| x = LogActionFilter(x, ["log", "process_output"]) |
| return x |
| |
| initial_handlers = logger._state.handlers |
| logger._state.handlers = [wrap_handler(handler) |
| for handler in initial_handlers] |
| |
| log = io.BytesIO() |
| # Setup logging for wptrunner that keeps process output and |
| # warning+ level logs only |
| logger.add_handler(StreamHandler(log, JSONFormatter())) |
| |
| wptrunner.run_tests(**kwargs) |
| |
| logger._state.handlers = initial_handlers |
| logger._state.running_tests = set() |
| logger._state.suite_started = False |
| |
| log.seek(0) |
| results, inconsistent, slow = process_results(log, iterations) |
| return results, inconsistent, slow, iterations |
| |
| |
| def get_steps(logger, repeat_loop, repeat_restart, kwargs_extras): |
| steps = [] |
| for kwargs_extra in kwargs_extras: |
| if kwargs_extra: |
| flags_string = " with flags %s" % " ".join( |
| "%s=%s" % item for item in kwargs_extra.iteritems()) |
| else: |
| flags_string = "" |
| |
| if repeat_loop: |
| desc = "Running tests in a loop %d times%s" % (repeat_loop, |
| flags_string) |
| steps.append((desc, functools.partial(run_step, logger, repeat_loop, False, kwargs_extra))) |
| |
| if repeat_restart: |
| desc = "Running tests in a loop with restarts %s times%s" % (repeat_restart, |
| flags_string) |
| steps.append((desc, functools.partial(run_step, logger, repeat_restart, True, kwargs_extra))) |
| |
| return steps |
| |
| |
| def write_summary(logger, step_results, final_result): |
| for desc, result in step_results: |
| logger.info('::: %s : %s' % (desc, result)) |
| logger.info(':::') |
| if final_result == "PASS": |
| log = logger.info |
| elif final_result == "TIMEOUT": |
| log = logger.warning |
| else: |
| log = logger.error |
| log('::: Test verification %s' % final_result) |
| |
| logger.info(':::') |
| |
| def check_stability(logger, repeat_loop=10, repeat_restart=5, chaos_mode=True, max_time=None, |
| output_results=True, **kwargs): |
| kwargs_extras = [{}] |
| if chaos_mode and kwargs["product"] == "firefox": |
| kwargs_extras.append({"chaos_mode_flags": 3}) |
| |
| steps = get_steps(logger, repeat_loop, repeat_restart, kwargs_extras) |
| |
| start_time = datetime.now() |
| step_results = [] |
| |
| for desc, step_func in steps: |
| if max_time and datetime.now() - start_time > max_time: |
| logger.info("::: Test verification is taking too long: Giving up!") |
| logger.info("::: So far, all checks passed, but not all checks were run.") |
| write_summary(logger, step_results, "TIMEOUT") |
| return 2 |
| |
| logger.info(':::') |
| logger.info('::: Running test verification step "%s"...' % desc) |
| logger.info(':::') |
| results, inconsistent, slow, iterations = step_func(**kwargs) |
| if output_results: |
| write_results(logger.info, results, iterations) |
| |
| if inconsistent: |
| step_results.append((desc, "FAIL")) |
| write_inconsistent(logger.info, inconsistent, iterations) |
| write_summary(logger, step_results, "FAIL") |
| return 1 |
| |
| if slow: |
| step_results.append((desc, "FAIL")) |
| write_slow_tests(logger.info, slow) |
| write_summary(logger, step_results, "FAIL") |
| return 1 |
| |
| step_results.append((desc, "PASS")) |
| |
| write_summary(logger, step_results, "PASS") |