diff --git a/dev-support/docker/README.md b/dev-support/docker/README.md index 32269c0929..4419b6c06f 100644 --- a/dev-support/docker/README.md +++ b/dev-support/docker/README.md @@ -26,9 +26,11 @@ the other. Different platforms have different toolchains. Some packages tend to across platforms and most commonly, a package that's readily available in one platform's toolchain isn't available on another. We thus, resort to building and installing the package from source, causing duplication of code since this needs to be done for all the Dockerfiles pertaining to all -the platforms. We need a system to track a dependency - for a package - for a platform. Thus, -there's a lot of diversity that needs to be handled for managing package dependencies and -`pkg-resolver` caters to that. +the platforms. We need a system to track a dependency - for a package - for a platform + +- (and optionally) for a release. Thus, there's a lot of diversity that needs to be handled for + managing package dependencies and + `pkg-resolver` caters to that. ## Supported platforms @@ -53,6 +55,21 @@ there's a lot of diversity that needs to be handled for managing package depende "package_2", "package_3" ] + }, + "dependency_3": { + "platform_1": { + "release_1": "package_1_1_1", + "release_2": [ + "package_1_2_1", + "package_1_2_2" + ] + }, + "platform_2": [ + "package_2_1", + { + "release_1": "package_2_1_1" + } + ] } } ``` @@ -65,6 +82,29 @@ how to interpret the above JSON - 2. For `dependency_2`, `package_1` and `package_2` needs to be installed for `platform_2`. 3. For `dependency_2`, `package_1`, `package_3` and `package_3` needs to be installed for `platform_1`. +4. For `dependency_3`, `package_1_1_1` gets installed only if `release_1` has been specified + for `platform_1`. +5. For `dependency_3`, the packages `package_1_2_1` and `package_1_2_2` gets installed only + if `release_2` has been specified for `platform_1`. +6. For `dependency_3`, for `platform_2`, `package_2_1` is always installed, but `package_2_1_1` gets + installed only if `release_1` has been specified. + +### Tool help + +```shell +$ pkg-resolver/resolve.py -h +usage: resolve.py [-h] [-r RELEASE] platform + +Platform package dependency resolver for building Apache Hadoop + +positional arguments: + platform The name of the platform to resolve the dependencies for + +optional arguments: + -h, --help show this help message and exit + -r RELEASE, --release RELEASE + The release label to filter the packages for the given platform +``` ## Standalone packages diff --git a/dev-support/docker/pkg-resolver/resolve.py b/dev-support/docker/pkg-resolver/resolve.py index 5d62edd323..bf3b8491f9 100644 --- a/dev-support/docker/pkg-resolver/resolve.py +++ b/dev-support/docker/pkg-resolver/resolve.py @@ -20,26 +20,55 @@ Platform package dependency resolver for building Apache Hadoop. """ +import argparse import json import sys from check_platform import is_supported_platform -def get_packages(platform): +def get_packages(platform, release=None): """ Resolve and get the list of packages to install for the given platform. :param platform: The platform for which the packages needs to be resolved. + :param release: An optional parameter that filters the packages of the given platform for the + specified release. :return: A list of resolved packages to install. """ with open('pkg-resolver/packages.json', encoding='utf-8', mode='r') as pkg_file: pkgs = json.loads(pkg_file.read()) packages = [] - for platforms in filter(lambda x: x.get(platform) is not None, pkgs.values()): - if isinstance(platforms.get(platform), list): - packages.extend(platforms.get(platform)) + + def process_package(package, in_release=False): + """ + Processes the given package object that belongs to a platform and adds it to the packages + list variable in the parent scope. + In essence, this method recursively traverses the JSON structure defined in packages.json + and performs the core filtering. + + :param package: The package object to process. + :param in_release: A boolean that indicates whether the current travels belongs to a package + that needs to be filtered for the given release label. + """ + if isinstance(package, list): + for entry in package: + process_package(entry, in_release) + elif isinstance(package, dict): + if release is None: + return + for entry in package.get(release, []): + process_package(entry, in_release=True) + elif isinstance(package, str): + # Filter out the package that doesn't belong to this release, + # if a release label has been specified. + if release is not None and not in_release: + return + packages.append(package) else: - packages.append(platforms.get(platform)) + raise Exception('Unknown package of type: {}'.format(type(package))) + + for platforms in filter(lambda x: x.get(platform) is not None, pkgs.values()): + process_package(platforms.get(platform)) return packages @@ -49,13 +78,21 @@ def get_packages(platform): file=sys.stderr) sys.exit(1) - platform_arg = sys.argv[1] - if not is_supported_platform(platform_arg): + arg_parser = argparse.ArgumentParser( + description='Platform package dependency resolver for building Apache Hadoop') + arg_parser.add_argument('-r', '--release', nargs=1, type=str, + help='The release label to filter the packages for the given platform') + arg_parser.add_argument('platform', nargs=1, type=str, + help='The name of the platform to resolve the dependencies for') + args = arg_parser.parse_args() + + if not is_supported_platform(args.platform[0]): print( 'ERROR: The given platform {} is not supported. ' 'Please refer to platforms.json for a list of supported platforms'.format( - platform_arg), file=sys.stderr) + args.platform), file=sys.stderr) sys.exit(1) - packages_to_install = get_packages(platform_arg) + packages_to_install = get_packages(args.platform[0], + args.release[0] if args.release is not None else None) print(' '.join(packages_to_install))