HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu,

This commit is contained in:
Yongjun Zhang 2015-06-17 15:48:29 -07:00
parent cc432885ad
commit 74351af3b7
2 changed files with 52 additions and 8 deletions

View File

@ -62,12 +62,19 @@
DEFAULT_JENKINS_URL = "https://builds.apache.org" DEFAULT_JENKINS_URL = "https://builds.apache.org"
DEFAULT_JOB_NAME = "Hadoop-Common-trunk" DEFAULT_JOB_NAME = "Hadoop-Common-trunk"
DEFAULT_NUM_PREVIOUS_DAYS = 14 DEFAULT_NUM_PREVIOUS_DAYS = 14
DEFAULT_TOP_NUM_FAILED_TEST = -1
SECONDS_PER_DAY = 86400 SECONDS_PER_DAY = 86400
# total number of runs to examine # total number of runs to examine
numRunsToExamine = 0 numRunsToExamine = 0
#summary mode
summary_mode = False
#total number of errors
error_count = 0
""" Parse arguments """ """ Parse arguments """
def parse_args(): def parse_args():
parser = OptionParser() parser = OptionParser()
@ -80,6 +87,10 @@ def parse_args():
parser.add_option("-n", "--num-days", type="int", parser.add_option("-n", "--num-days", type="int",
dest="num_prev_days", help="Number of days to examine", dest="num_prev_days", help="Number of days to examine",
default=DEFAULT_NUM_PREVIOUS_DAYS) default=DEFAULT_NUM_PREVIOUS_DAYS)
parser.add_option("-t", "--top", type="int",
dest="num_failed_tests",
help="Summary Mode, only show top number of failed tests",
default=DEFAULT_TOP_NUM_FAILED_TEST)
(options, args) = parser.parse_args() (options, args) = parser.parse_args()
if args: if args:
@ -100,6 +111,7 @@ def load_url_data(url):
""" List all builds of the target project. """ """ List all builds of the target project. """
def list_builds(jenkins_url, job_name): def list_builds(jenkins_url, job_name):
global summary_mode
url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict( url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict(
jenkins=jenkins_url, jenkins=jenkins_url,
job_name=job_name) job_name=job_name)
@ -108,19 +120,25 @@ def list_builds(jenkins_url, job_name):
data = load_url_data(url) data = load_url_data(url)
except: except:
if not summary_mode:
logging.error("Could not fetch: %s" % url) logging.error("Could not fetch: %s" % url)
error_count += 1
raise raise
return data['builds'] return data['builds']
""" Find the names of any tests which failed in the given build output URL. """ """ Find the names of any tests which failed in the given build output URL. """
def find_failing_tests(testReportApiJson, jobConsoleOutput): def find_failing_tests(testReportApiJson, jobConsoleOutput):
global summary_mode
global error_count
ret = set() ret = set()
try: try:
data = load_url_data(testReportApiJson) data = load_url_data(testReportApiJson)
except: except:
if not summary_mode:
logging.error(" Could not open testReport, check " + logging.error(" Could not open testReport, check " +
jobConsoleOutput + " for why it was reported failed") jobConsoleOutput + " for why it was reported failed")
error_count += 1
return ret return ret
for suite in data['suites']: for suite in data['suites']:
@ -130,7 +148,7 @@ def find_failing_tests(testReportApiJson, jobConsoleOutput):
if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)): if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)):
ret.add(cs['className'] + "." + cs['name']) ret.add(cs['className'] + "." + cs['name'])
if len(ret) == 0: if len(ret) == 0 and (not summary_mode):
logging.info(" No failed tests in testReport, check " + logging.info(" No failed tests in testReport, check " +
jobConsoleOutput + " for why it was reported failed.") jobConsoleOutput + " for why it was reported failed.")
return ret return ret
@ -138,6 +156,7 @@ def find_failing_tests(testReportApiJson, jobConsoleOutput):
""" Iterate runs of specfied job within num_prev_days and collect results """ """ Iterate runs of specfied job within num_prev_days and collect results """
def find_flaky_tests(jenkins_url, job_name, num_prev_days): def find_flaky_tests(jenkins_url, job_name, num_prev_days):
global numRunsToExamine global numRunsToExamine
global summary_mode
all_failing = dict() all_failing = dict()
# First list all builds # First list all builds
builds = list_builds(jenkins_url, job_name) builds = list_builds(jenkins_url, job_name)
@ -153,6 +172,7 @@ def find_flaky_tests(jenkins_url, job_name, num_prev_days):
tnum = len(builds) tnum = len(builds)
num = len(failing_build_urls) num = len(failing_build_urls)
numRunsToExamine = tnum numRunsToExamine = tnum
if not summary_mode:
logging.info(" THERE ARE " + str(num) + " builds (out of " + str(tnum) logging.info(" THERE ARE " + str(num) + " builds (out of " + str(tnum)
+ ") that have failed tests in the past " + str(num_prev_days) + " days" + ") that have failed tests in the past " + str(num_prev_days) + " days"
+ ((".", ", as listed below:\n")[num > 0])) + ((".", ", as listed below:\n")[num > 0]))
@ -165,10 +185,12 @@ def find_flaky_tests(jenkins_url, job_name, num_prev_days):
ts = float(failed_build_with_time[1]) / 1000. ts = float(failed_build_with_time[1]) / 1000.
st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
if not summary_mode:
logging.info("===>%s" % str(testReport) + " (" + st + ")") logging.info("===>%s" % str(testReport) + " (" + st + ")")
failing = find_failing_tests(testReportApiJson, jobConsoleOutput) failing = find_failing_tests(testReportApiJson, jobConsoleOutput)
if failing: if failing:
for ftest in failing: for ftest in failing:
if not summary_mode:
logging.info(" Failed test: %s" % ftest) logging.info(" Failed test: %s" % ftest)
all_failing[ftest] = all_failing.get(ftest,0)+1 all_failing[ftest] = all_failing.get(ftest,0)+1
@ -176,6 +198,7 @@ def find_flaky_tests(jenkins_url, job_name, num_prev_days):
def main(): def main():
global numRunsToExamine global numRunsToExamine
global summary_mode
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
# set up logger to write to stdout # set up logger to write to stdout
@ -189,16 +212,34 @@ def main():
logging.info("****Recently FAILED builds in url: " + opts.jenkins_url logging.info("****Recently FAILED builds in url: " + opts.jenkins_url
+ "/job/" + opts.job_name + "") + "/job/" + opts.job_name + "")
if opts.num_failed_tests != -1:
summary_mode = True
all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name, all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name,
opts.num_prev_days) opts.num_prev_days)
if len(all_failing) == 0: if len(all_failing) == 0:
raise SystemExit(0) raise SystemExit(0)
logging.info("\nAmong " + str(numRunsToExamine) + " runs examined, all failed "
+ "tests <#failedRuns: testName>:") if summary_mode and opts.num_failed_tests < len(all_failing):
logging.info("\nAmong " + str(numRunsToExamine) +
" runs examined, top " + str(opts.num_failed_tests) +
" failed tests <#failedRuns: testName>:")
else:
logging.info("\nAmong " + str(numRunsToExamine) +
" runs examined, all failed tests <#failedRuns: testName>:")
# print summary section: all failed tests sorted by how many times they failed # print summary section: all failed tests sorted by how many times they failed
line_count = 0
for tn in sorted(all_failing, key=all_failing.get, reverse=True): for tn in sorted(all_failing, key=all_failing.get, reverse=True):
logging.info(" " + str(all_failing[tn])+ ": " + tn) logging.info(" " + str(all_failing[tn])+ ": " + tn)
if summary_mode:
line_count += 1
if line_count == opts.num_failed_tests:
break
if summary_mode and error_count > 0:
logging.info("\n" + str(error_count) + " errors found, you may "
+ "re-run in non summary mode to see error details.");
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -648,6 +648,9 @@ Release 2.8.0 - UNRELEASED
HADOOP-11971. Move test utilities for tracing from hadoop-hdfs to HADOOP-11971. Move test utilities for tracing from hadoop-hdfs to
hadoop-common. (Masatake Iwasaki via aajisaka) hadoop-common. (Masatake Iwasaki via aajisaka)
HADOOP-11965. determine-flaky-tests needs a summary mode.
(Yufei Gu via Yongjun Zhang)
OPTIMIZATIONS OPTIMIZATIONS
HADOOP-11785. Reduce the number of listStatus operation in distcp HADOOP-11785. Reduce the number of listStatus operation in distcp