From b9b49ed956e6fa9b55758f3d2c1b92ae2597cdbb Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Sat, 20 Feb 2021 17:18:23 +0900 Subject: [PATCH] HADOOP-16748. Migrate to Python 3 and upgrade Yetus to 0.13.0 (#1738) - Upgrade Yetus to 0.13.0 to support Python 3 for the release scripts. - Removed determine-flaky-tests-hadoop.py. - Temporarily disabled shelldocs check due to YETUS-1099. Reviewed-by: Inigo Goiri Reviewed-by: Mingliang Liu --- dev-support/Jenkinsfile | 23 +- dev-support/bin/checkcompatibility.py | 37 +-- dev-support/bin/yetus-wrapper | 2 +- dev-support/determine-flaky-tests-hadoop.py | 245 -------------------- dev-support/docker/Dockerfile | 35 +-- dev-support/docker/Dockerfile_aarch64 | 35 +-- 6 files changed, 40 insertions(+), 337 deletions(-) delete mode 100755 dev-support/determine-flaky-tests-hadoop.py diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile index 1703d0153b..6841ed30a7 100644 --- a/dev-support/Jenkinsfile +++ b/dev-support/Jenkinsfile @@ -35,7 +35,7 @@ pipeline { DOCKERFILE = "${SOURCEDIR}/dev-support/docker/Dockerfile" YETUS='yetus' // Branch or tag name. Yetus release tags are 'rel/X.Y.Z' - YETUS_VERSION='6ab19e71eaf3234863424c6f684b34c1d3dcc0ce' + YETUS_VERSION='rel/0.13.0' } parameters { @@ -60,7 +60,7 @@ pipeline { stage ('precommit-run') { steps { withCredentials( - [usernamePassword(credentialsId: 'apache-hadoop-at-github.com', + [usernamePassword(credentialsId: '683f5dcf-5552-4b28-9fb1-6a6b77cf53dd', passwordVariable: 'GITHUB_TOKEN', usernameVariable: 'GITHUB_USER'), usernamePassword(credentialsId: 'hadoopqa-at-asf-jira', @@ -133,9 +133,6 @@ pipeline { # plugins to enable YETUS_ARGS+=("--plugins=all") - # use Hadoop's bundled shelldocs - YETUS_ARGS+=("--shelldocs=${WORKSPACE}/${SOURCEDIR}/dev-support/bin/shelldocs") - # don't let these tests cause -1s because we aren't really paying that # much attention to them YETUS_ARGS+=("--tests-filter=checkstyle") @@ -152,9 +149,6 @@ pipeline { # help keep the ASF boxes clean YETUS_ARGS+=("--sentinel") - # use emoji vote so it is easier to find the broken line - YETUS_ARGS+=("--github-use-emoji-vote") - # test with Java 8 and 11 YETUS_ARGS+=("--java-home=/usr/lib/jvm/java-8-openjdk-amd64") YETUS_ARGS+=("--multijdkdirs=/usr/lib/jvm/java-11-openjdk-amd64") @@ -174,6 +168,19 @@ pipeline { post { always { script { + // Publish status if it was missed (YETUS-1059) + withCredentials( + [usernamePassword(credentialsId: '683f5dcf-5552-4b28-9fb1-6a6b77cf53dd', + passwordVariable: 'GITHUB_TOKEN', + usernameVariable: 'GITHUB_USER')]) { + sh '''#!/usr/bin/env bash + YETUS_ARGS+=("--github-token=${GITHUB_TOKEN}") + YETUS_ARGS+=("--patch-dir=${WORKSPACE}/${PATCHDIR}") + TESTPATCHBIN="${WORKSPACE}/${YETUS}/precommit/src/main/shell/github-status-recovery.sh" + /usr/bin/env bash "${TESTPATCHBIN}" "${YETUS_ARGS[@]}" ${EXTRA_ARGS} || true + ''' + } + // Yetus output archiveArtifacts "${env.PATCHDIR}/**" // Publish the HTML report so that it can be looked at diff --git a/dev-support/bin/checkcompatibility.py b/dev-support/bin/checkcompatibility.py index ad1e9cbe47..3db36154ef 100755 --- a/dev-support/bin/checkcompatibility.py +++ b/dev-support/bin/checkcompatibility.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -30,33 +30,16 @@ import shutil import subprocess import sys -import urllib2 -try: - import argparse -except ImportError: - sys.stderr.write("Please install argparse, e.g. via `pip install argparse`.") - sys.exit(2) +import urllib.request +import argparse # Various relative paths REPO_DIR = os.getcwd() def check_output(*popenargs, **kwargs): - r"""Run command with arguments and return its output as a byte string. - Backported from Python 2.7 as it's implemented as pure python on stdlib. - >>> check_output(['/usr/bin/python', '--version']) - Python 2.6.2 - """ - process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) - output, _ = process.communicate() - retcode = process.poll() - if retcode: - cmd = kwargs.get("args") - if cmd is None: - cmd = popenargs[0] - error = subprocess.CalledProcessError(retcode, cmd) - error.output = output - raise error - return output + """ Run command with arguments and return its output as a string. """ + return subprocess.check_output(*popenargs, **kwargs, encoding='utf-8') + def get_repo_dir(): """ Return the path to the top of the repo. """ @@ -139,7 +122,7 @@ def checkout_java_acc(force): url = "https://github.com/lvc/japi-compliance-checker/archive/1.8.tar.gz" scratch_dir = get_scratch_dir() path = os.path.join(scratch_dir, os.path.basename(url)) - jacc = urllib2.urlopen(url) + jacc = urllib.request.urlopen(url) with open(path, 'wb') as w: w.write(jacc.read()) @@ -194,7 +177,7 @@ def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations): annotations_path = os.path.join(get_scratch_dir(), "annotations.txt") with file(annotations_path, "w") as f: for ann in annotations: - print >>f, ann + print(ann, file=f) args += ["-annotations-list", annotations_path] subprocess.check_call(args) @@ -264,8 +247,8 @@ def main(): parser.add_argument("--skip-build", action="store_true", help="Skip building the projects.") - parser.add_argument("src_rev", nargs=1, help="Source revision.") - parser.add_argument("dst_rev", nargs="?", default="HEAD", + parser.add_argument("src_rev", nargs=1, type=str, help="Source revision.") + parser.add_argument("dst_rev", nargs="?", type=str, default="HEAD", help="Destination revision. " + "If not specified, will use HEAD.") diff --git a/dev-support/bin/yetus-wrapper b/dev-support/bin/yetus-wrapper index bca2316ae6..8532d17497 100755 --- a/dev-support/bin/yetus-wrapper +++ b/dev-support/bin/yetus-wrapper @@ -77,7 +77,7 @@ WANTED="$1" shift ARGV=("$@") -HADOOP_YETUS_VERSION=${HADOOP_YETUS_VERSION:-0.10.0} +HADOOP_YETUS_VERSION=${HADOOP_YETUS_VERSION:-0.13.0} BIN=$(yetus_abs "${BASH_SOURCE-$0}") BINDIR=$(dirname "${BIN}") diff --git a/dev-support/determine-flaky-tests-hadoop.py b/dev-support/determine-flaky-tests-hadoop.py deleted file mode 100755 index 8644299bba..0000000000 --- a/dev-support/determine-flaky-tests-hadoop.py +++ /dev/null @@ -1,245 +0,0 @@ -#!/usr/bin/env python -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Given a jenkins test job, this script examines all runs of the job done -# within specified period of time (number of days prior to the execution -# time of this script), and reports all failed tests. -# -# The output of this script includes a section for each run that has failed -# tests, with each failed test name listed. -# -# More importantly, at the end, it outputs a summary section to list all failed -# tests within all examined runs, and indicate how many runs a same test -# failed, and sorted all failed tests by how many runs each test failed. -# -# This way, when we see failed tests in PreCommit build, we can quickly tell -# whether a failed test is a new failure, or it failed before and how often it -# failed, so to have idea whether it may just be a flaky test. -# -# Of course, to be 100% sure about the reason of a test failure, closer look -# at the failed test for the specific run is necessary. -# -import sys -import platform -sysversion = sys.hexversion -onward30 = False -if sysversion < 0x020600F0: - sys.exit("Minimum supported python version is 2.6, the current version is " + - "Python" + platform.python_version()) - -if sysversion == 0x030000F0: - sys.exit("There is a known bug with Python" + platform.python_version() + - ", please try a different version"); - -if sysversion < 0x03000000: - import urllib2 -else: - onward30 = True - import urllib.request - -import datetime -import json as simplejson -import logging -from optparse import OptionParser -import time - -# Configuration -DEFAULT_JENKINS_URL = "https://builds.apache.org" -DEFAULT_JOB_NAME = "Hadoop-Common-trunk" -DEFAULT_NUM_PREVIOUS_DAYS = 14 -DEFAULT_TOP_NUM_FAILED_TEST = -1 - -SECONDS_PER_DAY = 86400 - -# total number of runs to examine -numRunsToExamine = 0 - -#summary mode -summary_mode = False - -#total number of errors -error_count = 0 - -""" Parse arguments """ -def parse_args(): - parser = OptionParser() - parser.add_option("-J", "--jenkins-url", type="string", - dest="jenkins_url", help="Jenkins URL", - default=DEFAULT_JENKINS_URL) - parser.add_option("-j", "--job-name", type="string", - dest="job_name", help="Job name to look at", - default=DEFAULT_JOB_NAME) - parser.add_option("-n", "--num-days", type="int", - dest="num_prev_days", help="Number of days to examine", - default=DEFAULT_NUM_PREVIOUS_DAYS) - parser.add_option("-t", "--top", type="int", - dest="num_failed_tests", - help="Summary Mode, only show top number of failed tests", - default=DEFAULT_TOP_NUM_FAILED_TEST) - - (options, args) = parser.parse_args() - if args: - parser.error("unexpected arguments: " + repr(args)) - return options - -""" Load data from specified url """ -def load_url_data(url): - if onward30: - ourl = urllib.request.urlopen(url) - codec = ourl.info().get_param('charset') - content = ourl.read().decode(codec) - data = simplejson.loads(content, strict=False) - else: - ourl = urllib2.urlopen(url) - data = simplejson.load(ourl, strict=False) - return data - -""" List all builds of the target project. """ -def list_builds(jenkins_url, job_name): - global summary_mode - url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict( - jenkins=jenkins_url, - job_name=job_name) - - try: - data = load_url_data(url) - - except: - if not summary_mode: - logging.error("Could not fetch: %s" % url) - error_count += 1 - raise - return data['builds'] - -""" Find the names of any tests which failed in the given build output URL. """ -def find_failing_tests(testReportApiJson, jobConsoleOutput): - global summary_mode - global error_count - ret = set() - try: - data = load_url_data(testReportApiJson) - - except: - if not summary_mode: - logging.error(" Could not open testReport, check " + - jobConsoleOutput + " for why it was reported failed") - error_count += 1 - return ret - - for suite in data['suites']: - for cs in suite['cases']: - status = cs['status'] - errDetails = cs['errorDetails'] - if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)): - ret.add(cs['className'] + "." + cs['name']) - - if len(ret) == 0 and (not summary_mode): - logging.info(" No failed tests in testReport, check " + - jobConsoleOutput + " for why it was reported failed.") - return ret - -""" Iterate runs of specfied job within num_prev_days and collect results """ -def find_flaky_tests(jenkins_url, job_name, num_prev_days): - global numRunsToExamine - global summary_mode - all_failing = dict() - # First list all builds - builds = list_builds(jenkins_url, job_name) - - # Select only those in the last N days - min_time = int(time.time()) - SECONDS_PER_DAY * num_prev_days - builds = [b for b in builds if (int(b['timestamp']) / 1000) > min_time] - - # Filter out only those that failed - failing_build_urls = [(b['url'] , b['timestamp']) for b in builds - if (b['result'] in ('UNSTABLE', 'FAILURE'))] - - tnum = len(builds) - num = len(failing_build_urls) - numRunsToExamine = tnum - if not summary_mode: - logging.info(" THERE ARE " + str(num) + " builds (out of " + str(tnum) - + ") that have failed tests in the past " + str(num_prev_days) + " days" - + ((".", ", as listed below:\n")[num > 0])) - - for failed_build_with_time in failing_build_urls: - failed_build = failed_build_with_time[0]; - jobConsoleOutput = failed_build + "Console"; - testReport = failed_build + "testReport"; - testReportApiJson = testReport + "/api/json"; - - ts = float(failed_build_with_time[1]) / 1000. - st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') - if not summary_mode: - logging.info("===>%s" % str(testReport) + " (" + st + ")") - failing = find_failing_tests(testReportApiJson, jobConsoleOutput) - if failing: - for ftest in failing: - if not summary_mode: - logging.info(" Failed test: %s" % ftest) - all_failing[ftest] = all_failing.get(ftest,0)+1 - - return all_failing - -def main(): - global numRunsToExamine - global summary_mode - logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) - - # set up logger to write to stdout - soh = logging.StreamHandler(sys.stdout) - soh.setLevel(logging.INFO) - logger = logging.getLogger() - logger.removeHandler(logger.handlers[0]) - logger.addHandler(soh) - - opts = parse_args() - logging.info("****Recently FAILED builds in url: " + opts.jenkins_url - + "/job/" + opts.job_name + "") - - if opts.num_failed_tests != -1: - summary_mode = True - - all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name, - opts.num_prev_days) - if len(all_failing) == 0: - raise SystemExit(0) - - if summary_mode and opts.num_failed_tests < len(all_failing): - logging.info("\nAmong " + str(numRunsToExamine) + - " runs examined, top " + str(opts.num_failed_tests) + - " failed tests <#failedRuns: testName>:") - else: - logging.info("\nAmong " + str(numRunsToExamine) + - " runs examined, all failed tests <#failedRuns: testName>:") - - # print summary section: all failed tests sorted by how many times they failed - line_count = 0 - for tn in sorted(all_failing, key=all_failing.get, reverse=True): - logging.info(" " + str(all_failing[tn])+ ": " + tn) - if summary_mode: - line_count += 1 - if line_count == opts.num_failed_tests: - break - - if summary_mode and error_count > 0: - logging.info("\n" + str(error_count) + " errors found, you may " - + "re-run in non summary mode to see error details."); - -if __name__ == "__main__": - main() diff --git a/dev-support/docker/Dockerfile b/dev-support/docker/Dockerfile index cf442902d3..930d44a839 100644 --- a/dev-support/docker/Dockerfile +++ b/dev-support/docker/Dockerfile @@ -72,10 +72,11 @@ RUN apt-get -q update \ openjdk-8-jdk \ pinentry-curses \ pkg-config \ - python \ - python2.7 \ - python-pkg-resources \ - python-setuptools \ + python3 \ + python3-pip \ + python3-pkg-resources \ + python3-setuptools \ + python3-wheel \ rsync \ shellcheck \ software-properties-common \ @@ -108,18 +109,6 @@ RUN mkdir -p /opt/boost-library \ && cd /root \ && rm -rf /opt/boost-library -#### -# Install pip (deprecated from Focal toolchain) -#### -# hadolint ignore=DL3003 -RUN mkdir -p /opt/pip \ - && curl -L https://bootstrap.pypa.io/2.7/get-pip.py > get-pip.py \ - && mv get-pip.py /opt/pip \ - && cd /opt/pip \ - && python2.7 get-pip.py "pip < 21.0" \ - && cd /root \ - && rm -rf /opt/pip - ###### # Install Google Protobuf 3.7.1 (3.6.1 ships with Focal) ###### @@ -139,19 +128,9 @@ ENV PROTOBUF_HOME /opt/protobuf ENV PATH "${PATH}:/opt/protobuf/bin" #### -# Install pylint at fixed version (2.0.0 removed python2 support) -# https://github.com/PyCQA/pylint/issues/2294 +# Install pylint and python-dateutil #### -RUN pip2 install \ - astroid==1.6.6 \ - isort==4.3.21 \ - configparser==4.0.2 \ - pylint==1.9.2 - -#### -# Install dateutil.parser -#### -RUN pip2 install python-dateutil==2.7.3 +RUN pip3 install pylint==2.6.0 python-dateutil==2.8.1 #### # Install bower diff --git a/dev-support/docker/Dockerfile_aarch64 b/dev-support/docker/Dockerfile_aarch64 index bdf9e0c7e5..858c9b36dd 100644 --- a/dev-support/docker/Dockerfile_aarch64 +++ b/dev-support/docker/Dockerfile_aarch64 @@ -76,10 +76,11 @@ RUN apt-get -q update \ openjdk-8-jdk \ pinentry-curses \ pkg-config \ - python \ - python2.7 \ - python-pkg-resources \ - python-setuptools \ + python3 \ + python3-pip \ + python3-pkg-resources \ + python3-setuptools \ + python3-wheel \ rsync \ shellcheck \ software-properties-common \ @@ -112,18 +113,6 @@ RUN mkdir -p /opt/boost-library \ && cd /root \ && rm -rf /opt/boost-library -#### -# Install pip (deprecated from Focal toolchain) -#### -# hadolint ignore=DL3003 -RUN mkdir -p /opt/pip \ - && curl -L https://bootstrap.pypa.io/2.7/get-pip.py > get-pip.py \ - && mv get-pip.py /opt/pip \ - && cd /opt/pip \ - && python2.7 get-pip.py "pip < 21.0" \ - && cd /root \ - && rm -rf /opt/pip - ###### # Install Google Protobuf 3.7.1 (3.6.1 ships with Focal) ###### @@ -143,19 +132,9 @@ ENV PROTOBUF_HOME /opt/protobuf ENV PATH "${PATH}:/opt/protobuf/bin" #### -# Install pylint at fixed version (2.0.0 removed python2 support) -# https://github.com/PyCQA/pylint/issues/2294 +# Install pylint and python-dateutil #### -RUN pip2 install \ - astroid==1.6.6 \ - isort==4.3.21 \ - configparser==4.0.2 \ - pylint==1.9.2 - -#### -# Install dateutil.parser -#### -RUN pip2 install python-dateutil==2.7.3 +RUN pip3 install pylint==2.6.0 python-dateutil==2.8.1 #### # Install bower