blob: 4171f60e0a3277c1c83b433e84d3593f84a179af [file] [log] [blame]
# Copyright (C) 2021 Igalia S.L.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# Implements the main loop for executing the tests and handling
# missing/multiple results (because of infrastructure issues). It also
# takes care of marking failing tests as flaky when
# --treat-failing-as-flaky is in effect. It's a class so that we can
# easily instantiate a subclass for testing.
class BaseTestsExecutor
def initialize(runlist, serialPlans, remoteHosts, iterationLimits,
treatFailingAsFlaky, logStream=$stderr)
@runlist = runlist
@serialPlans = serialPlans
@remoteHosts = remoteHosts
@treatFailingAsFlaky = treatFailingAsFlaky
@infraIterationsFloor = iterationLimits.infraIterationsFloor
@iterationsCeiling = iterationLimits.iterationsCeiling
if @iterationsCeiling < @infraIterationsFloor
raise "iterationsCeiling (#{@iterationsCeiling} < @infraIterationsFloor (#{@infraIterationsFloor}"
end
@maxIterations = setMaxIterations(initialMaxAllowedIterations(runlist))
@logStream = logStream
end
def log(s)
@logStream.puts(s) unless @logStream.nil?
end
def setMaxIterations(value)
if value > @iterationsCeiling
value = @iterationsCeiling
end
@maxIterations = value
end
def iterationsForInfraFailures
# If we're running remotely, scale the extra iterations to
# account for infrastructure failures by the number of hosts.
numRemoteHosts = 0
if not @remoteHosts.nil?
numRemoteHosts = @remoteHosts.size / 2
end
[numRemoteHosts, @infraIterationsFloor].max
end
def initialMaxAllowedIterations(runlist)
maxTries = 0
runlist.each { |plan|
next if plan.retryParameters.nil?
if plan.retryParameters.maxTries > maxTries
maxTries = plan.retryParameters.maxTries
end
}
# The number of allowed iterations is the sum of
# iterationsForInfraFailures and the max times we run a test
# that appears to be flaky.
iterationsForInfraFailures + maxTries
end
def adjustFlakinessForFailingTests(iteration, statusMap, evaluator)
completedTests = evaluator.completed
if @treatFailingAsFlaky.nil?
return completedTests
end
# If we have a lot of failing tests, it's likely that the tree
# is bad and there's no point in treating the failures as
# potential flaky tests.
if evaluator.testsFailed.size > @treatFailingAsFlaky.maxFailing
return completedTests
end
adjustedTests = Set.new
evaluator.testsFailed.each { |plan|
if plan.retryParameters.nil?
plan.retryParameters = RetryParameters.new(@treatFailingAsFlaky.passPercentage,
@treatFailingAsFlaky.maxTries)
adjustedTests.add(plan)
end
}
if adjustedTests.size > 0
# Adjust the maximum number of tries upwards if we just
# marked a failing test as flaky. Note that infrastructure
# issues might cause test failures to be reported at later
# iterations, so we use (+) instead of (max). This is safe
# as the number of iterations is capped by
# iterationsCeiling.
newMaxIterations = @maxIterations + @treatFailingAsFlaky.maxTries
if @maxIterations < newMaxIterations
setMaxIterations(newMaxIterations)
end
putd("adjustFlakinessForFailingTests #{newMaxIterations} (->#{@maxIterations}) #{adjustedTests.collect { |p| p.to_s }}")
end
# We just marked those as flaky in order to rerun then, so
# take them off the completedTests.
ret = completedTests - adjustedTests
putd("completedTests: #{ret.collect { |p| p.to_s }}")
ret
end
def loop
prepareArtifacts(@remoteHosts)
statusMap = {}
# This is a high-level retry loop. If a remote host goes away in
# the middle of a run and either doesn't come back online or comes
# back having removed the remote directory (*), getStatusMap will be
# missing results for these tests, allowing us to retry them in
# the next iteration.
#
# (*) This may well happen when running the tests on embedded
# boards that regularly corrupt their SD card. Definitely an issue
# for the ci20 boards used to test MIPS.
100000.times { |iteration|
if iteration >= @maxIterations
break
end
# @remoteHosts (and therefore remoteHosts) will be nil
# when not executing on remotes.
remoteHosts = prepareExecution(@remoteHosts)
if remoteHosts and remoteHosts.size == 0
# Either everything is down, or everything got
# rebooted. In the latter case, allow enough time for
# the remote boards to boot up, then retry.
waitInterval = 60
log("All remote hosts failed, retrying after #{waitInterval}s")
sleep(waitInterval)
next
end
executeTests(remoteHosts)
statusMap = updateStatusMap(iteration, statusMap)
evaluator = TestResultEvaluatorSimple.new(@runlist, statusMap)
evaluator.visit!
completedTests = adjustFlakinessForFailingTests(iteration, statusMap, evaluator)
# NB: evaluator.completed may now be out of date, we need
# to use completedTests instead.
break unless completedTests.size != @runlist.size
if completedTests.size > @runlist.size
raise "Test count mismatch: #{completedTests.size} > #{@runlist.size}"
end
# Regenerate the lists of tests to run
refreshExecution(@runlist, @serialPlans, completedTests)
numFlaky = @runlist.size - completedTests.size - evaluator.missing.size
testsInfo = "completed #{completedTests.size}/#{@runlist.size} (#{numFlaky} flaky, #{evaluator.missing.size} missing)"
remoteHostsInfo = ""
if remoteHosts
remoteHostsInfo = ", #{remoteHosts.size}/#{@remoteHosts.size} hosts live"
end
log("After try #{iteration + 1}/#{@maxIterations}: #{testsInfo}#{remoteHostsInfo}")
}
statusMap
end
end
module ExecutorSelfTests
FinalTestResult = Struct.new(:result)
TestResult = Struct.new(:results)
Iteration = Struct.new(:testResults)
TestCase = Struct.new(:testsWithFlaky, :iterations, :expectedTestResults, :treatFailingAsFlaky)
MOCK_TEST_FAMILY = "f"
ITERATION_LIMITS = OpenStruct.new(:infraIterationsFloor => 3,
:iterationsCeiling => 8)
class MockTestsExecutor < BaseTestsExecutor
attr_reader :executedIterations
def initialize(runlist, iterationResults, maxIterationsBounds, treatFailingAsFlaky, logStream)
super(runlist, [], nil, maxIterationsBounds, treatFailingAsFlaky, logStream)
@iterationResults = iterationResults
@executedIterations = 0
end
def prepareArtifacts(remoteHosts)
end
def prepareExecution(remoteHosts)
remoteHosts
end
def executeTests(remoteHosts)
end
def updateStatusMap(iteration, statusMap)
if iteration >= @iterationResults.size
$stderr.puts("Trying to execute iteration #{iteration} but only have #{@iterationResults.size} iteration results available")
raise "Self test error: insufficient iterations"
end
@executedIterations += 1
@iterationResults[iteration].each { |iteration|
iteration.each_with_index { |testResult, testIndex|
index = @runlist[testIndex].index
# Because of infra issues, a test might have multiple results.
testResult.results.each { |result|
line = "./#{STATUS_FILE_PREFIX}#{index} #{$runUniqueId} 0 #{result}"
processStatusLine(statusMap, line)
}
}
}
putd("statusMap: #{statusMap}")
statusMap
end
def refreshExecution(runlist, serialPlans, completedTests)
end
end
def ExecutorSelfTests.run
trf = TestResult.new([STATUS_FILE_FAIL])
trp = TestResult.new([STATUS_FILE_PASS])
tre = TestResult.new([])
ftrp = FinalTestResult.new(STATUS_FILE_PASS)
ftrf = FinalTestResult.new(STATUS_FILE_FAIL)
ftre = FinalTestResult.new(nil)
[
TestCase.new([nil, nil], # 2 tests (t0, t1), neither flaky
[
Iteration.new(
[
trp, # t0 passed
trp, # t1 passed
])
],
[ftrp, ftrp]),
TestCase.new([nil, nil], # 2 tests (t0, t1), neither flaky
[
Iteration.new([trp, trf]),
],
[ftrp, ftrf]),
# 1 test gives no result, need a second iteration, both finally pass
TestCase.new([nil, nil],
[
Iteration.new([trp, tre]),
Iteration.new([tre, trp]),
],
[ftrp, ftrp]),
# 1 test fails, 1 gives no result, need a second iteration, one finally passes
TestCase.new([nil, nil],
[
Iteration.new([trf, tre]),
Iteration.new([tre, trp]),
],
[ftrf, ftrp]),
# 1 test passes, 1 gives no result, need a second iteration, one finally passes
TestCase.new([nil, nil],
[
Iteration.new([trp, tre]),
Iteration.new([tre, trf]),
],
[ftrp, ftrf]),
TestCase.new([nil, nil],
[
# First iteration doesn't produce any results.
Iteration.new([tre, tre]),
Iteration.new([trf, tre]),
Iteration.new([tre, trf]),
],
[ftrf, ftrf]),
# Test that, when no flakies are present, we stop at
# infraIterationsFloor when no results come in.
TestCase.new([nil, nil],
[Iteration.new([tre, tre])] * ITERATION_LIMITS.infraIterationsFloor,
[ftre, ftre]),
# Test the maxIterations adjustment works as expected in
# the presence of a static flaky.
TestCase.new([RetryParameters.new(0.39, 4), nil],
[
# Since we have a flaky test, the maxIterations gets adjusted to
# ITERATION_LIMITS.infraIterationsFloor + RetryParameters.maxTries = 7.
Iteration.new([tre, trp]),
Iteration.new([trp, tre]),
Iteration.new([trf, tre]),
Iteration.new([tre, tre]),
Iteration.new([tre, tre]),
Iteration.new([trp, tre]),
],
[ftrp, ftrp]),
# Test that a flaky can finish early.
TestCase.new([RetryParameters.new(0.4, 4), nil],
[
Iteration.new([tre, trp]),
Iteration.new([trp, tre]),
Iteration.new([trp, tre]),
],
[ftrp, ftrp]),
# Same, with more variability.
TestCase.new([nil, nil],
[
Iteration.new([trp, tre]),
Iteration.new([tre, trf]),
Iteration.new([tre, trp]),
Iteration.new([tre, trf]),
Iteration.new([tre, trp]),
Iteration.new([tre, trp]),
],
[ftrp, ftrp],
OpenStruct.new(:passPercentage => 0.4,
:maxTries => 6,
:maxFailing => 3)),
# Same, with different variability.
TestCase.new([nil, nil],
[
Iteration.new([trp, tre]),
Iteration.new([tre, trf]),
Iteration.new([tre, trp]),
Iteration.new([tre, trf]),
Iteration.new([tre, trp]),
Iteration.new([tre, trf]),
Iteration.new([tre, trf]),
],
[ftrp, ftrf],
OpenStruct.new(:passPercentage => 0.4,
:maxTries => 6,
:maxFailing => 3)),
# Test that a flaky for which not enough results come in is not marked as completed.
TestCase.new([nil],
[
Iteration.new([trf]),
Iteration.new([trp]),
Iteration.new([trf]),
Iteration.new([tre]),
Iteration.new([tre]),
Iteration.new([tre]),
Iteration.new([tre]),
],
[ftre],
OpenStruct.new(:passPercentage => 0.49,
:maxTries => 4,
:maxFailing => 1)),
# Test that (statically) flaky tests aren't adjusted
TestCase.new([RetryParameters.new(0.5, 3), nil],
[
Iteration.new([trf, trp]),
Iteration.new([trp, tre]),
Iteration.new([trf, tre]),
],
[ftrf, ftrp],
OpenStruct.new(:passPercentage => 0.7, # > 2/3
:maxTries => 6,
:maxFailing => 3)),
# Test that we respect maxFailing.
TestCase.new([nil, nil, nil],
[
Iteration.new([trf, trf, trf]),
],
[ftrf, ftrf, ftrf],
OpenStruct.new(:passPercentage => 0.7, # > 2/3
:maxTries => 6,
:maxFailing => 2)),
# Test that we halt at maxIterations (3 + 5 = 8) if no results come in.
TestCase.new([RetryParameters.new(0.5, 5), nil],
[
Iteration.new([tre, trp]),
Iteration.new([tre, tre]),
Iteration.new([tre, tre]),
Iteration.new([tre, tre]),
Iteration.new([tre, tre]),
Iteration.new([tre, tre]),
Iteration.new([tre, tre]),
Iteration.new([tre, tre]),
],
[ftre, ftrp]),
].each_with_index {
|testcase, testcaseIndex|
runlist = []
putd("===> Testcase #{testcaseIndex}")
testcase.testsWithFlaky.each_with_index { |retryParameters, index|
plan = BasePlan.mock(MOCK_TEST_FAMILY, "t#{index}", retryParameters)
runlist << plan
}
putd("runlist: #{runlist.collect { |p| p.to_s }}, expected: #{testcase.expectedTestResults}")
executor = MockTestsExecutor.new(runlist, testcase.iterations,
ITERATION_LIMITS, testcase.treatFailingAsFlaky,
$testsDebugStream)
statusMap = executor.loop
if executor.executedIterations != testcase.iterations.size
# The testcase should provide exactly as many
# iteration results as needed.
raise "Executed #{executor.executedIterations}, #{testcase.iterations.size} given"
end
expectCompleted = Set.new(runlist.select { |plan|
not testcase.expectedTestResults[runlist.index(plan)].result.nil?
})
evaluator = TestResultEvaluatorSimple.new(runlist, statusMap)
evaluator.visit!
verifyCompletedTestsMatchExpected(statusMap, evaluator.completed, expectCompleted)
testcaseFailed = false
runlist.each_with_index { |plan, index|
case testcase.expectedTestResults[index].result
when nil
if evaluator.testsPassed.include?(plan)
testcaseFailed = true
$stderr.puts("Test #{index} should not have completed but passed")
elsif evaluator.testsFailed.include?(plan)
testcaseFailed = true
$stderr.puts("Test #{index} should not have completed but failed")
end
when STATUS_FILE_PASS
if not evaluator.testsPassed.include?(plan)
testcaseFailed = true
$stderr.puts("Test #{index} should have passed but didn't")
end
when STATUS_FILE_FAIL
if not evaluator.testsFailed.include?(plan)
testcaseFailed = true
$stderr.puts("Test #{index} should have failed but didn't")
end
else
raise "Can't get here"
end
}
if testcaseFailed
$stderr.puts("statusMap = #{statusMap}")
raise "Self test failure"
end
}
end
end