From 74351af3b7521b194116258c96270ddaeccd8126 Mon Sep 17 00:00:00 2001 From: Yongjun Zhang Date: Wed, 17 Jun 2015 15:48:29 -0700 Subject: [PATCH] HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu, --- dev-support/determine-flaky-tests-hadoop.py | 57 ++++++++++++++++--- .../hadoop-common/CHANGES.txt | 3 + 2 files changed, 52 insertions(+), 8 deletions(-) diff --git a/dev-support/determine-flaky-tests-hadoop.py b/dev-support/determine-flaky-tests-hadoop.py index ce152bacae..8644299bba 100755 --- a/dev-support/determine-flaky-tests-hadoop.py +++ b/dev-support/determine-flaky-tests-hadoop.py @@ -62,12 +62,19 @@ DEFAULT_JENKINS_URL = "https://builds.apache.org" DEFAULT_JOB_NAME = "Hadoop-Common-trunk" DEFAULT_NUM_PREVIOUS_DAYS = 14 +DEFAULT_TOP_NUM_FAILED_TEST = -1 SECONDS_PER_DAY = 86400 # total number of runs to examine numRunsToExamine = 0 +#summary mode +summary_mode = False + +#total number of errors +error_count = 0 + """ Parse arguments """ def parse_args(): parser = OptionParser() @@ -80,6 +87,10 @@ def parse_args(): parser.add_option("-n", "--num-days", type="int", dest="num_prev_days", help="Number of days to examine", default=DEFAULT_NUM_PREVIOUS_DAYS) + parser.add_option("-t", "--top", type="int", + dest="num_failed_tests", + help="Summary Mode, only show top number of failed tests", + default=DEFAULT_TOP_NUM_FAILED_TEST) (options, args) = parser.parse_args() if args: @@ -100,6 +111,7 @@ def load_url_data(url): """ List all builds of the target project. """ def list_builds(jenkins_url, job_name): + global summary_mode url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict( jenkins=jenkins_url, job_name=job_name) @@ -108,19 +120,25 @@ def list_builds(jenkins_url, job_name): data = load_url_data(url) except: - logging.error("Could not fetch: %s" % url) + if not summary_mode: + logging.error("Could not fetch: %s" % url) + error_count += 1 raise return data['builds'] """ Find the names of any tests which failed in the given build output URL. """ def find_failing_tests(testReportApiJson, jobConsoleOutput): + global summary_mode + global error_count ret = set() try: data = load_url_data(testReportApiJson) except: - logging.error(" Could not open testReport, check " + + if not summary_mode: + logging.error(" Could not open testReport, check " + jobConsoleOutput + " for why it was reported failed") + error_count += 1 return ret for suite in data['suites']: @@ -130,7 +148,7 @@ def find_failing_tests(testReportApiJson, jobConsoleOutput): if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)): ret.add(cs['className'] + "." + cs['name']) - if len(ret) == 0: + if len(ret) == 0 and (not summary_mode): logging.info(" No failed tests in testReport, check " + jobConsoleOutput + " for why it was reported failed.") return ret @@ -138,6 +156,7 @@ def find_failing_tests(testReportApiJson, jobConsoleOutput): """ Iterate runs of specfied job within num_prev_days and collect results """ def find_flaky_tests(jenkins_url, job_name, num_prev_days): global numRunsToExamine + global summary_mode all_failing = dict() # First list all builds builds = list_builds(jenkins_url, job_name) @@ -153,7 +172,8 @@ def find_flaky_tests(jenkins_url, job_name, num_prev_days): tnum = len(builds) num = len(failing_build_urls) numRunsToExamine = tnum - logging.info(" THERE ARE " + str(num) + " builds (out of " + str(tnum) + if not summary_mode: + logging.info(" THERE ARE " + str(num) + " builds (out of " + str(tnum) + ") that have failed tests in the past " + str(num_prev_days) + " days" + ((".", ", as listed below:\n")[num > 0])) @@ -165,17 +185,20 @@ def find_flaky_tests(jenkins_url, job_name, num_prev_days): ts = float(failed_build_with_time[1]) / 1000. st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') - logging.info("===>%s" % str(testReport) + " (" + st + ")") + if not summary_mode: + logging.info("===>%s" % str(testReport) + " (" + st + ")") failing = find_failing_tests(testReportApiJson, jobConsoleOutput) if failing: for ftest in failing: - logging.info(" Failed test: %s" % ftest) + if not summary_mode: + logging.info(" Failed test: %s" % ftest) all_failing[ftest] = all_failing.get(ftest,0)+1 return all_failing def main(): global numRunsToExamine + global summary_mode logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) # set up logger to write to stdout @@ -189,16 +212,34 @@ def main(): logging.info("****Recently FAILED builds in url: " + opts.jenkins_url + "/job/" + opts.job_name + "") + if opts.num_failed_tests != -1: + summary_mode = True + all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name, opts.num_prev_days) if len(all_failing) == 0: raise SystemExit(0) - logging.info("\nAmong " + str(numRunsToExamine) + " runs examined, all failed " - + "tests <#failedRuns: testName>:") + + if summary_mode and opts.num_failed_tests < len(all_failing): + logging.info("\nAmong " + str(numRunsToExamine) + + " runs examined, top " + str(opts.num_failed_tests) + + " failed tests <#failedRuns: testName>:") + else: + logging.info("\nAmong " + str(numRunsToExamine) + + " runs examined, all failed tests <#failedRuns: testName>:") # print summary section: all failed tests sorted by how many times they failed + line_count = 0 for tn in sorted(all_failing, key=all_failing.get, reverse=True): logging.info(" " + str(all_failing[tn])+ ": " + tn) + if summary_mode: + line_count += 1 + if line_count == opts.num_failed_tests: + break + + if summary_mode and error_count > 0: + logging.info("\n" + str(error_count) + " errors found, you may " + + "re-run in non summary mode to see error details."); if __name__ == "__main__": main() diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 67e9c763a1..4b1b382539 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -648,6 +648,9 @@ Release 2.8.0 - UNRELEASED HADOOP-11971. Move test utilities for tracing from hadoop-hdfs to hadoop-common. (Masatake Iwasaki via aajisaka) + HADOOP-11965. determine-flaky-tests needs a summary mode. + (Yufei Gu via Yongjun Zhang) + OPTIMIZATIONS HADOOP-11785. Reduce the number of listStatus operation in distcp