HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu,

2015-06-17 15:48:29 -07:00 · 2015-06-17 15:48:29 -07:00 · 74351af3b7
commit 74351af3b7
parent cc432885ad
2 changed files with 52 additions and 8 deletions
--- a/dev-support/determine-flaky-tests-hadoop.py
+++ b/dev-support/determine-flaky-tests-hadoop.py
@ -62,12 +62,19 @@
 DEFAULT_JENKINS_URL = "https://builds.apache.org"
 DEFAULT_JOB_NAME = "Hadoop-Common-trunk"
 DEFAULT_NUM_PREVIOUS_DAYS = 14
 DEFAULT_TOP_NUM_FAILED_TEST = -1
 SECONDS_PER_DAY = 86400
 # total number of runs to examine
 numRunsToExamine = 0
 #summary mode
 summary_mode = False
 #total number of errors
 error_count = 0
 """ Parse arguments """
 def parse_args():
  parser = OptionParser()
@ -80,6 +87,10 @@ def parse_args():
  parser.add_option("-n", "--num-days", type="int",
                    dest="num_prev_days", help="Number of days to examine",
                    default=DEFAULT_NUM_PREVIOUS_DAYS)
  parser.add_option("-t", "--top", type="int",
                    dest="num_failed_tests",
                    help="Summary Mode, only show top number of failed tests",
                    default=DEFAULT_TOP_NUM_FAILED_TEST)
  (options, args) = parser.parse_args()
  if args:
@ -100,6 +111,7 @@ def load_url_data(url):
 """ List all builds of the target project. """
 def list_builds(jenkins_url, job_name):
  global summary_mode
  url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict(
      jenkins=jenkins_url,
      job_name=job_name)
@ -108,19 +120,25 @@ def list_builds(jenkins_url, job_name):
    data = load_url_data(url)
  except:
    if not summary_mode:
      logging.error("Could not fetch: %s" % url)
    error_count += 1
    raise
  return data['builds']
 """ Find the names of any tests which failed in the given build output URL. """
 def find_failing_tests(testReportApiJson, jobConsoleOutput):
  global summary_mode
  global error_count
  ret = set()
  try:
    data = load_url_data(testReportApiJson)
  except:
    if not summary_mode:
      logging.error("    Could not open testReport, check " +
        jobConsoleOutput + " for why it was reported failed")
    error_count += 1
    return ret
  for suite in data['suites']:
@ -130,7 +148,7 @@ def find_failing_tests(testReportApiJson, jobConsoleOutput):
      if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)):
        ret.add(cs['className'] + "." + cs['name'])
-  if len(ret) == 0:
+  if len(ret) == 0 and (not summary_mode):
    logging.info("    No failed tests in testReport, check " +
        jobConsoleOutput + " for why it was reported failed.")
  return ret
@ -138,6 +156,7 @@ def find_failing_tests(testReportApiJson, jobConsoleOutput):
 """ Iterate runs of specfied job within num_prev_days and collect results """
 def find_flaky_tests(jenkins_url, job_name, num_prev_days):
  global numRunsToExamine
  global summary_mode
  all_failing = dict()
  # First list all builds
  builds = list_builds(jenkins_url, job_name)
@ -153,6 +172,7 @@ def find_flaky_tests(jenkins_url, job_name, num_prev_days):
  tnum = len(builds)
  num = len(failing_build_urls)
  numRunsToExamine = tnum
  if not summary_mode:
    logging.info("    THERE ARE " + str(num) + " builds (out of " + str(tnum)
      + ") that have failed tests in the past " + str(num_prev_days) + " days"
      + ((".", ", as listed below:\n")[num > 0]))
@ -165,10 +185,12 @@ def find_flaky_tests(jenkins_url, job_name, num_prev_days):
    ts = float(failed_build_with_time[1]) / 1000.
    st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
    if not summary_mode:
      logging.info("===>%s" % str(testReport) + " (" + st + ")")
    failing = find_failing_tests(testReportApiJson, jobConsoleOutput)
    if failing:
      for ftest in failing:
        if not summary_mode:
          logging.info("    Failed test: %s" % ftest)
        all_failing[ftest] = all_failing.get(ftest,0)+1
@ -176,6 +198,7 @@ def find_flaky_tests(jenkins_url, job_name, num_prev_days):
 def main():
  global numRunsToExamine
  global summary_mode
  logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
  # set up logger to write to stdout
@ -189,16 +212,34 @@ def main():
  logging.info("****Recently FAILED builds in url: " + opts.jenkins_url
      + "/job/" + opts.job_name + "")
  if opts.num_failed_tests != -1:
    summary_mode = True
  all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name,
      opts.num_prev_days)
  if len(all_failing) == 0:
    raise SystemExit(0)
-  logging.info("\nAmong " + str(numRunsToExamine) + " runs examined, all failed "
+
-      + "tests <#failedRuns: testName>:")
+  if summary_mode and opts.num_failed_tests < len(all_failing):
    logging.info("\nAmong " + str(numRunsToExamine) +
                 " runs examined, top " + str(opts.num_failed_tests) +
                 " failed tests <#failedRuns: testName>:")
  else:
      logging.info("\nAmong " + str(numRunsToExamine) +
                   " runs examined, all failed tests <#failedRuns: testName>:")
  # print summary section: all failed tests sorted by how many times they failed
  line_count = 0
  for tn in sorted(all_failing, key=all_failing.get, reverse=True):
    logging.info("    " + str(all_failing[tn])+ ": " + tn)
    if summary_mode:
      line_count += 1
      if line_count == opts.num_failed_tests:
        break
  if summary_mode and error_count > 0:
    logging.info("\n" + str(error_count) + " errors found, you may "
                 + "re-run in non summary mode to see error details.");
 if __name__ == "__main__":
  main()
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@ -648,6 +648,9 @@ Release 2.8.0 - UNRELEASED
    HADOOP-11971. Move test utilities for tracing from hadoop-hdfs to
    hadoop-common. (Masatake Iwasaki via aajisaka)
    HADOOP-11965. determine-flaky-tests needs a summary mode.
    (Yufei Gu via Yongjun Zhang)
  OPTIMIZATIONS
    HADOOP-11785. Reduce the number of listStatus operation in distcp