hadoop/dev-support/bin/checkcompatibility.py

#!/usr/bin/env python3
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# Script which checks Java API compatibility between two revisions of the
# Java client.
#
# Originally sourced from Apache Kudu, which was based on the
# compatibility checker from the Apache HBase project, but ported to
# Python for better readability.

import logging
import os
import re
import shutil
import subprocess
import sys
import urllib.request
import argparse

# Various relative paths
REPO_DIR = os.getcwd()

def check_output(*popenargs, **kwargs):
  """ Run command with arguments and return its output as a string. """
  return subprocess.check_output(*popenargs, **kwargs, encoding='utf-8')


def get_repo_dir():
  """ Return the path to the top of the repo. """
  dirname, _ = os.path.split(os.path.abspath(__file__))
  return os.path.join(dirname, "../..")

def get_scratch_dir():
  """ Return the path to the scratch dir that we build within. """
  scratch_dir = os.path.join(get_repo_dir(), "target", "compat-check")
  if not os.path.exists(scratch_dir):
    os.makedirs(scratch_dir)
  return scratch_dir

def get_java_acc_dir():
  """ Return the path where we check out the Java API Compliance Checker. """
  return os.path.join(get_repo_dir(), "target", "java-acc")


def clean_scratch_dir(scratch_dir):
  """ Clean up and re-create the scratch directory. """
  if os.path.exists(scratch_dir):
    logging.info("Removing scratch dir %s...", scratch_dir)
    shutil.rmtree(scratch_dir)
  logging.info("Creating empty scratch dir %s...", scratch_dir)
  os.makedirs(scratch_dir)


def checkout_java_tree(rev, path):
  """ Check out the Java source tree for the given revision into
  the given path. """
  logging.info("Checking out %s in %s", rev, path)
  os.makedirs(path)
  # Extract java source
  subprocess.check_call(["bash", '-o', 'pipefail', "-c",
                         ("git archive --format=tar %s | " +
                          "tar -C \"%s\" -xf -") % (rev, path)],
                        cwd=get_repo_dir())

def get_git_hash(revname):
  """ Convert 'revname' to its SHA-1 hash. """
  return check_output(["git", "rev-parse", revname],
                      cwd=get_repo_dir()).strip()

def get_repo_name():
  """Get the name of the repo based on the git remote."""
  remotes = check_output(["git", "remote", "-v"],
                         cwd=get_repo_dir()).strip().split("\n")
  # Example output:
  # origin	https://github.com/apache/hadoop.git (fetch)
  # origin	https://github.com/apache/hadoop.git (push)
  remote_url = remotes[0].split("\t")[1].split(" ")[0]
  remote = remote_url.split("/")[-1]
  if remote.endswith(".git"):
    remote = remote[:-4]
  return remote

def build_tree(java_path):
  """ Run the Java build within 'path'. """
  logging.info("Building in %s...", java_path)
  subprocess.check_call(["mvn", "-DskipTests", "-Dmaven.javadoc.skip=true",
                         "package"],
                        cwd=java_path)


def checkout_java_acc(force):
  """
  Check out the Java API Compliance Checker. If 'force' is true, will
  re-download even if the directory exists.
  """
  acc_dir = get_java_acc_dir()
  if os.path.exists(acc_dir):
    logging.info("Java ACC is already downloaded.")
    if not force:
      return
    logging.info("Forcing re-download.")
    shutil.rmtree(acc_dir)

  logging.info("Downloading Java ACC...")

  url = "https://github.com/lvc/japi-compliance-checker/archive/1.8.tar.gz"
  scratch_dir = get_scratch_dir()
  path = os.path.join(scratch_dir, os.path.basename(url))
  jacc = urllib.request.urlopen(url)
  with open(path, 'wb') as w:
    w.write(jacc.read())

  subprocess.check_call(["tar", "xzf", path],
                        cwd=scratch_dir)

  shutil.move(os.path.join(scratch_dir, "japi-compliance-checker-1.8"),
              os.path.join(acc_dir))


def find_jars(path):
  """ Return a list of jars within 'path' to be checked for compatibility. """
  all_jars = set(check_output(["find", path, "-name", "*.jar"]).splitlines())

  return [j for j in all_jars if (
      "-tests" not in j and
      "-sources" not in j and
      "-with-dependencies" not in j)]

def write_xml_file(path, version, jars):
  """Write the XML manifest file for JACC."""
  with open(path, "wt") as f:
    f.write("<version>" + version + "</version>\n")
    f.write("<archives>")
    for j in jars:
      f.write(j + "\n")
    f.write("</archives>")

def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations):
  """ Run the compliance checker to compare 'src' and 'dst'. """
  logging.info("Will check compatibility between original jars:\n\t%s\n" +
               "and new jars:\n\t%s",
               "\n\t".join(src_jars),
               "\n\t".join(dst_jars))

  java_acc_path = os.path.join(get_java_acc_dir(), "japi-compliance-checker.pl")

  src_xml_path = os.path.join(get_scratch_dir(), "src.xml")
  dst_xml_path = os.path.join(get_scratch_dir(), "dst.xml")
  write_xml_file(src_xml_path, src_name, src_jars)
  write_xml_file(dst_xml_path, dst_name, dst_jars)

  out_path = os.path.join(get_scratch_dir(), "report.html")

  args = ["perl", java_acc_path,
          "-l", get_repo_name(),
          "-d1", src_xml_path,
          "-d2", dst_xml_path,
          "-report-path", out_path]

  if annotations is not None:
    annotations_path = os.path.join(get_scratch_dir(), "annotations.txt")
    with file(annotations_path, "w") as f:
      for ann in annotations:
        print(ann, file=f)
    args += ["-annotations-list", annotations_path]

  subprocess.check_call(args)

def filter_jars(jars, include_filters, exclude_filters):
  """Filter the list of JARs based on include and exclude filters."""
  filtered = []
  # Apply include filters
  for j in jars:
    found = False
    basename = os.path.basename(j)
    for f in include_filters:
      if f.match(basename):
        found = True
        break
    if found:
      filtered += [j]
    else:
      logging.debug("Ignoring JAR %s", j)
  # Apply exclude filters
  exclude_filtered = []
  for j in filtered:
    basename = os.path.basename(j)
    found = False
    for f in exclude_filters:
      if f.match(basename):
        found = True
        break
    if found:
      logging.debug("Ignoring JAR %s", j)
    else:
      exclude_filtered += [j]

  return exclude_filtered


def main():
  """Main function."""
  logging.basicConfig(level=logging.INFO)
  parser = argparse.ArgumentParser(
      description="Run Java API Compliance Checker.")
  parser.add_argument("-f", "--force-download",
                      action="store_true",
                      help="Download dependencies (i.e. Java JAVA_ACC) " +
                      "even if they are already present")
  parser.add_argument("-i", "--include-file",
                      action="append",
                      dest="include_files",
                      help="Regex filter for JAR files to be included. " +
                      "Applied before the exclude filters. " +
                      "Can be specified multiple times.")
  parser.add_argument("-e", "--exclude-file",
                      action="append",
                      dest="exclude_files",
                      help="Regex filter for JAR files to be excluded. " +
                      "Applied after the include filters. " +
                      "Can be specified multiple times.")
  parser.add_argument("-a", "--annotation",
                      action="append",
                      dest="annotations",
                      help="Fully-qualified Java annotation. " +
                      "Java ACC will only check compatibility of " +
                      "annotated classes. Can be specified multiple times.")
  parser.add_argument("--skip-clean",
                      action="store_true",
                      help="Skip cleaning the scratch directory.")
  parser.add_argument("--skip-build",
                      action="store_true",
                      help="Skip building the projects.")
  parser.add_argument("src_rev", nargs=1, type=str, help="Source revision.")
  parser.add_argument("dst_rev", nargs="?", type=str, default="HEAD",
                      help="Destination revision. " +
                      "If not specified, will use HEAD.")

  if len(sys.argv) == 1:
    parser.print_help()
    sys.exit(1)

  args = parser.parse_args()

  src_rev, dst_rev = args.src_rev[0], args.dst_rev

  logging.info("Source revision: %s", src_rev)
  logging.info("Destination revision: %s", dst_rev)

  # Construct the JAR regex patterns for filtering.
  include_filters = []
  if args.include_files is not None:
    for f in args.include_files:
      logging.info("Applying JAR filename include filter: %s", f)
      include_filters += [re.compile(f)]
  else:
    include_filters = [re.compile(".*")]

  exclude_filters = []
  if args.exclude_files is not None:
    for f in args.exclude_files:
      logging.info("Applying JAR filename exclude filter: %s", f)
      exclude_filters += [re.compile(f)]

  # Construct the annotation list
  annotations = args.annotations
  if annotations is not None:
    logging.info("Filtering classes using %d annotation(s):", len(annotations))
    for a in annotations:
      logging.info("\t%s", a)

  # Download deps.
  checkout_java_acc(args.force_download)

  # Set up the build.
  scratch_dir = get_scratch_dir()
  src_dir = os.path.join(scratch_dir, "src")
  dst_dir = os.path.join(scratch_dir, "dst")

  if args.skip_clean:
    logging.info("Skipping cleaning the scratch directory")
  else:
    clean_scratch_dir(scratch_dir)
    # Check out the src and dst source trees.
    checkout_java_tree(get_git_hash(src_rev), src_dir)
    checkout_java_tree(get_git_hash(dst_rev), dst_dir)

  # Run the build in each.
  if args.skip_build:
    logging.info("Skipping the build")
  else:
    build_tree(src_dir)
    build_tree(dst_dir)

  # Find the JARs.
  src_jars = find_jars(src_dir)
  dst_jars = find_jars(dst_dir)

  # Filter the JARs.
  src_jars = filter_jars(src_jars, include_filters, exclude_filters)
  dst_jars = filter_jars(dst_jars, include_filters, exclude_filters)

  if len(src_jars) == 0 or len(dst_jars) == 0:
    logging.error("No JARs found! Are your filters too strong?")
    sys.exit(1)

  run_java_acc(src_rev, src_jars,
               dst_rev, dst_jars, annotations)


if __name__ == "__main__":
  main()