From 22d5c9c503d357c7cbb10cf2966fcf39b4efa633 Mon Sep 17 00:00:00 2001 From: zeekling Date: Mon, 26 Sep 2022 22:49:02 +0800 Subject: [PATCH] update --- hadoop/README.md | 3 +- hadoop/build.sh | 1 + hadoop/hadoop/.dockerignore | 14 +++ hadoop/hadoop/CentOS-8-reg.repo | 52 ++++++++++++ hadoop/hadoop/Dockerfile | 99 ++++++++++++++++++++++ hadoop/hadoop/Makefile | 18 ++++ hadoop/hadoop/README.md | 43 ++++++++++ hadoop/hadoop/conf/core-site.xml | 8 ++ hadoop/hadoop/conf/hdfs-site.xml | 16 ++++ hadoop/hadoop/conf/mapred-site.xml | 8 ++ hadoop/hadoop/conf/yarn-site.xml | 20 +++++ hadoop/hadoop/docker-compose.yml | 32 +++++++ hadoop/hadoop/entrypoint.sh | 88 +++++++++++++++++++ hadoop/hadoop/get_versions | 29 +++++++ hadoop/hadoop/hooks/post_build | 132 +++++++++++++++++++++++++++++ hadoop/hadoop/profile.d/hadoop.sh | 3 + hadoop/hadoop/ssh/config | 2 + 17 files changed, 566 insertions(+), 2 deletions(-) create mode 100644 hadoop/hadoop/.dockerignore create mode 100644 hadoop/hadoop/CentOS-8-reg.repo create mode 100644 hadoop/hadoop/Dockerfile create mode 100644 hadoop/hadoop/Makefile create mode 100644 hadoop/hadoop/README.md create mode 100644 hadoop/hadoop/conf/core-site.xml create mode 100644 hadoop/hadoop/conf/hdfs-site.xml create mode 100644 hadoop/hadoop/conf/mapred-site.xml create mode 100644 hadoop/hadoop/conf/yarn-site.xml create mode 100644 hadoop/hadoop/docker-compose.yml create mode 100755 hadoop/hadoop/entrypoint.sh create mode 100755 hadoop/hadoop/get_versions create mode 100755 hadoop/hadoop/hooks/post_build create mode 100644 hadoop/hadoop/profile.d/hadoop.sh create mode 100644 hadoop/hadoop/ssh/config diff --git a/hadoop/README.md b/hadoop/README.md index 50f57c1..d3b29ae 100644 --- a/hadoop/README.md +++ b/hadoop/README.md @@ -6,9 +6,8 @@ sudo ./build.sh ``` hdfs路径: -http://172.17.0.2:50070/explorer.html#/ +http://172.17.0.2:9870/dfshealth.html#tab-overview -http://172.17.0.2:50075/datanode.html yarn路径 http://172.17.0.2:8088/cluster diff --git a/hadoop/build.sh b/hadoop/build.sh index 5d197b0..46717a5 100755 --- a/hadoop/build.sh +++ b/hadoop/build.sh @@ -7,5 +7,6 @@ docker stop hadoop docker rm hadoop docker run -dit --name hadoop --privileged=true \ + -v /data/hadoop/dfs:/dfs/data \ -p 8042 -p 8088 -p 19888 -p 50070 -p 50075 zeekling/hadoop diff --git a/hadoop/hadoop/.dockerignore b/hadoop/hadoop/.dockerignore new file mode 100644 index 0000000..9862c1b --- /dev/null +++ b/hadoop/hadoop/.dockerignore @@ -0,0 +1,14 @@ +*.bak +*.md +*.orig +*.swp +*.tmp +.* +.DS_Store +.aws +.env +.gitignore +Dockerfile +LICENSE +Makefile +docker-compose.yml diff --git a/hadoop/hadoop/CentOS-8-reg.repo b/hadoop/hadoop/CentOS-8-reg.repo new file mode 100644 index 0000000..8017061 --- /dev/null +++ b/hadoop/hadoop/CentOS-8-reg.repo @@ -0,0 +1,52 @@ +# CentOS-Base.repo +# +# The mirror system uses the connecting IP address of the client and the +# update status of each mirror to pick mirrors that are updated to and +# geographically close to the client. You should use this for CentOS updates +# unless you are manually picking other mirrors. +# +# If the mirrorlist= does not work for you, as a fall back you can try the +# remarked out baseurl= line instead. +# +# + +[BaseOS] +name=CentOS-$releasever - Base - repo.huaweicloud.com +baseurl=https://repo.huaweicloud.com/centos-vault/8.5.2111/BaseOS/$basearch/os/ +#mirrorlist=https://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=BaseOS&infra=$infra +gpgcheck=1 +gpgkey=https://repo.huaweicloud.com/centos/RPM-GPG-KEY-CentOS-Official + +#released updates +[AppStream] +name=CentOS-$releasever - AppStream - repo.huaweicloud.com +baseurl=https://repo.huaweicloud.com/centos-vault/8.5.2111/AppStream/$basearch/os/ +#mirrorlist=https://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=AppStream&infra=$infra +gpgcheck=1 +gpgkey=https://repo.huaweicloud.com/centos/RPM-GPG-KEY-CentOS-Official + +[PowerTools] +name=CentOS-$releasever - PowerTools - repo.huaweicloud.com +baseurl=https://repo.huaweicloud.com/centos-vault/8.5.2111/PowerTools/$basearch/os/ +#mirrorlist=https://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=PowerTools&infra=$infra +gpgcheck=1 +gpgkey=https://repo.huaweicloud.com/centos/RPM-GPG-KEY-CentOS-Official + + +#additional packages that may be useful +[extras] +name=CentOS-$releasever - Extras - repo.huaweicloud.com +baseurl=https://repo.huaweicloud.com/centos-vault/8.5.2111/extras/$basearch/os/ +#mirrorlist=https://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=extras +gpgcheck=1 +gpgkey=https://repo.huaweicloud.com/centos/RPM-GPG-KEY-CentOS-Official + + +#additional packages that extend functionality of existing packages +[centosplus] +name=CentOS-$releasever - Plus - repo.huaweicloud.com +baseurl=https://repo.huaweicloud.com/centos-vault/8.5.2111/centosplus/$basearch/os/ +#mirrorlist=https://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=centosplus +gpgcheck=1 +enabled=0 +gpgkey=https://repo.huaweicloud.com/centos/RPM-GPG-KEY-CentOS-Official diff --git a/hadoop/hadoop/Dockerfile b/hadoop/hadoop/Dockerfile new file mode 100644 index 0000000..8528638 --- /dev/null +++ b/hadoop/hadoop/Dockerfile @@ -0,0 +1,99 @@ +# +# Author: Hari Sekhon +# Date: 2016-04-24 21:18:57 +0100 (Sun, 24 Apr 2016) +# +# vim:ts=4:sts=4:sw=4:et +# +# https://github.com/HariSekhon/Dockerfiles +# +# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback +# +# https://www.linkedin.com/in/HariSekhon +# + +# nosemgrep: dockerfile.audit.dockerfile-source-not-pinned.dockerfile-source-not-pinned +FROM harisekhon/centos-java:latest + +ARG HADOOP_VERSION=3.3.1 + +LABEL org.opencontainers.image.description="Hadoop" \ + org.opencontainers.image.version="$HADOOP_VERSION" \ + org.opencontainers.image.authors="Hari Sekhon (https://www.linkedin.com/in/HariSekhon)" \ + org.opencontainers.image.url="https://ghcr.io/HariSekhon/hadoop" \ + org.opencontainers.image.documentation="https://hub.docker.com/r/harisekhon/hadoop" \ + org.opencontainers.image.source="https://github.com/HariSekhon/Dockerfiles" + +ARG TAR=hadoop-$HADOOP_VERSION.tar.gz + +ENV PATH $PATH:/hadoop/bin + +WORKDIR / + +RUN rm -rf /etc/yum.repos.d/*.repo + +COPY ./CentOS-8-reg.repo /etc/yum.repos.d/CentOS-Base.repo + +RUN yum clean all && yum makecache && yum install -y openssh-server openssh-clients tar which + +RUN set -eux && \ + yum install -y wget hostname && \ + # --max-redirect - some apache mirrors redirect a couple times and give you the latest version instead + # but this breaks stuff later because the link will not point to the right dir + # (and is also the wrong version for the tag) +# wget -t 10 --max-redirect 1 --retry-connrefused -O "$TAR" "http://www.apache.org/dyn/closer.lua?filename=hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-$HADOOP_VERSION.tar.gz&action=download" || \ + wget -t 10 --max-redirect 1 --retry-connrefused -O "$TAR" "http://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-$HADOOP_VERSION.tar.gz" && \ + tar zxf "$TAR" && \ + # check tarball was extracted to the right place, helps ensure it's the right version and the link will work + test -d "hadoop-$HADOOP_VERSION" && \ + ln -sv "hadoop-$HADOOP_VERSION" hadoop && \ + mkdir /etc/hadoop && \ + ln -s /hadoop/etc/hadoop /etc/hadoop/conf && \ + rm -fv "$TAR" && \ + { rm -rf hadoop/share/doc; : ; } && \ + yum autoremove -y && \ + # gets autoremoved, ensure it's added back as Hadoop scripts need it + yum install -y hostname && \ + yum clean all && \ + rm -rf /var/cache/yum + +COPY entrypoint.sh / +COPY conf/core-site.xml /hadoop/etc/hadoop/ +COPY conf/hdfs-site.xml /hadoop/etc/hadoop/ +COPY conf/yarn-site.xml /hadoop/etc/hadoop/ +COPY conf/mapred-site.xml /hadoop/etc/hadoop/ +COPY profile.d/hadoop.sh /etc/profile.d/ +COPY ssh/config /root/.ssh/ + +RUN set -eux && \ + # Hadoop 1.x + #/hadoop/bin/hadoop namenode -format && \ + # Hadoop 2.x + /hadoop/bin/hdfs namenode -format && \ + groupadd hadoop && \ + useradd -g hadoop hdfs && \ + useradd -g hadoop yarn && \ + mkdir -p /dfs/name && \ + mkdir -p /hadoop/logs && \ + chown -R hdfs:hadoop /dfs/name && \ + chgrp -R hadoop /hadoop/logs && \ + chmod -R 0770 /hadoop/logs && \ + mkdir -p /root/.ssh \ + /home/hdfs/.ssh \ + /home/yarn/.ssh && \ + chown hdfs /home/hdfs/.ssh && \ + chown yarn /home/yarn/.ssh && \ + chmod 0700 /root/.ssh \ + /home/hdfs/.ssh \ + /home/yarn/.ssh + +ENV HDFS_NAMENODE_USER=hdfs +ENV HDFS_SECONDARYNAMENODE_USER=hdfs +ENV HDFS_DATANODE_USER=hdfs +ENV YARN_RESOURCEMANAGER_USER=yarn +ENV YARN_NODEMANAGER_USER=yarn + +#EXPOSE 8020 8042 8088 9000 10020 19888 50010 50020 50070 50075 50090 +# Hadoop 3.0 changed ports :-( +EXPOSE 8020 8042 8088 9000 9868 9870 10020 19888 50010 50020 50090 + +CMD ["/entrypoint.sh"] diff --git a/hadoop/hadoop/Makefile b/hadoop/hadoop/Makefile new file mode 100644 index 0000000..334522d --- /dev/null +++ b/hadoop/hadoop/Makefile @@ -0,0 +1,18 @@ +# +# Author: Hari Sekhon +# Date: 2016-04-24 21:32:37 +0100 (Sun, 24 Apr 2016) +# +# vim:ts=4:sts=4:sw=4:noet +# +# https://github.com/HariSekhon/Dockerfiles +# +# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help improve or steer this or other code I publish +# +# https://www.linkedin.com/in/HariSekhon +# + +REPO := harisekhon/hadoop + +include ../Makefile.in + +MAP_PORTS := 8042 8088 19888 50010 50020 50070 50075 50090 diff --git a/hadoop/hadoop/README.md b/hadoop/hadoop/README.md new file mode 100644 index 0000000..e7d2987 --- /dev/null +++ b/hadoop/hadoop/README.md @@ -0,0 +1,43 @@ +# Apache Hadoop + +[![DockerHub Hadoop](https://img.shields.io/badge/DockerHub-harisekhon%2Fhadoop-blue)](https://hub.docker.com/repository/docker/harisekhon/hadoop) + +https://hadoop.apache.org/ + +Big Data Distributed Storage and Compute Software + +- Yarn - Distributed Processing Framework for running MapReduce, Spark and other application frameworks +- HDFS - Distributed Storage + +By default starts a pseudo-distributed cluster of 4 daemons in a single container: + +- Yarn + - ResourceManager - Cluster Processing Master (submit jobs here) + - NodeManager - Cluster Processing Worker +- HDFS + - NameNode - Filesystem Master + - DataNode - Filesystem Worker + +Perfect for development and testing. Recommended to use Docker with 4GB+ RAM for this pseudo-cluster container. + +For real scaling just start a single daemon in each container for fully distributed setup. + + +To run the all-in-one-container cluster and expose all the UIs for NodeManager, ResourceManager, NameNode and DataNode respectively, do: +``` +docker run -ti -p 8042 -p 8088 -p 19888 -p 50070 -p 50075 harisekhon/hadoop +``` + +or with docker-compose: + +``` +docker-compose up +``` + +or without `docker-compose`, a shortcut for the docker run command: + +``` +make run +``` + +Related Docker images can be found for many Open Source, Big Data and NoSQL technologies on [my DockerHub profile](https://hub.docker.com/r/harisekhon). The source for them all can be found in the [master Dockerfiles GitHub repo](https://github.com/HariSekhon/Dockerfiles/). diff --git a/hadoop/hadoop/conf/core-site.xml b/hadoop/hadoop/conf/core-site.xml new file mode 100644 index 0000000..bb5a4cd --- /dev/null +++ b/hadoop/hadoop/conf/core-site.xml @@ -0,0 +1,8 @@ + + + + + fs.defaultFS + hdfs://localhost:8020 + + diff --git a/hadoop/hadoop/conf/hdfs-site.xml b/hadoop/hadoop/conf/hdfs-site.xml new file mode 100644 index 0000000..c8411de --- /dev/null +++ b/hadoop/hadoop/conf/hdfs-site.xml @@ -0,0 +1,16 @@ + + + + + dfs.replication + 1 + + + dfs.namenode.name.dir + /dfs/name + + + dfs.datanode.data.dir + /dfs/data + + diff --git a/hadoop/hadoop/conf/mapred-site.xml b/hadoop/hadoop/conf/mapred-site.xml new file mode 100644 index 0000000..a9d0127 --- /dev/null +++ b/hadoop/hadoop/conf/mapred-site.xml @@ -0,0 +1,8 @@ + + + + + mapreduce.framework.name + yarn + + diff --git a/hadoop/hadoop/conf/yarn-site.xml b/hadoop/hadoop/conf/yarn-site.xml new file mode 100644 index 0000000..03d95da --- /dev/null +++ b/hadoop/hadoop/conf/yarn-site.xml @@ -0,0 +1,20 @@ + + + + yarn.nodemanager.aux-services + mapreduce_shuffle + + + yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage + 100 + + + yarn.nodemanager.resource.cpu-vcores + 1 + + + + yarn.nodemanager.resource.memory-mb + 3072 + + diff --git a/hadoop/hadoop/docker-compose.yml b/hadoop/hadoop/docker-compose.yml new file mode 100644 index 0000000..21702dc --- /dev/null +++ b/hadoop/hadoop/docker-compose.yml @@ -0,0 +1,32 @@ +# vim:ts=2:sts=2:sw=2:et +# +# Author: Hari Sekhon +# Date: 2016-12-09 21:25:07 +0000 (Fri, 09 Dec 2016) +# +# https://github.com/HariSekhon/Dockerfiles +# +# License: see accompanying Hari Sekhon LICENSE file +# +# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help steer this or other code I publish +# +# https://www.linkedin.com/in/HariSekhon +# + +version: '3' +services: + hadoop: + image: harisekhon/hadoop:${VERSION:-latest} + ports: + #- 8020:8020 + - 8042:8042 + - 8088:8088 + #- 9000:9000 + - 9864:9864 + - 9870:9870 + #- 10020:10020 + - 19888:19888 + - 50010:50010 + - 50020:50020 + - 50070:50070 + - 50075:50075 + - 50090:50090 diff --git a/hadoop/hadoop/entrypoint.sh b/hadoop/hadoop/entrypoint.sh new file mode 100755 index 0000000..3f90aa2 --- /dev/null +++ b/hadoop/hadoop/entrypoint.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# vim:ts=4:sts=4:sw=4:et +# +# Author: Hari Sekhon +# Date: 2016-04-24 21:29:46 +0100 (Sun, 24 Apr 2016) +# +# https://github.com/HariSekhon/Dockerfiles +# +# License: see accompanying Hari Sekhon LICENSE file +# +# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback +# +# https://www.linkedin.com/in/HariSekhon +# + +set -euo pipefail +[ -n "${DEBUG:-}" ] && set -x + +export JAVA_HOME="${JAVA_HOME:-/usr}" + +export PATH="$PATH:/hadoop/sbin:/hadoop/bin" + +if [ $# -gt 0 ]; then + exec "$@" +else + for x in root hdfs yarn; do + if ! [ -f "/$x/.ssh/id_rsa" ]; then + su - "$x" <<-EOF + [ -n "${DEBUG:-}" ] && set -x + ssh-keygen -t rsa -f ~/.ssh/id_rsa -N "" +EOF + fi + if ! [ -f "$x/.ssh/authorized_keys" ]; then + su - "$x" <<-EOF + [ -n "${DEBUG:-}" ] && set -x + cp -rfv ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys + chmod -v 0400 ~/.ssh/authorized_keys +EOF + fi + done + + # removed in newer versions of CentOS + if ! [ -f /etc/ssh/ssh_host_rsa_key ] && [ -x /usr/sbin/sshd-keygen ]; then + /usr/sbin/sshd-keygen || : + fi + if ! [ -f /etc/ssh/ssh_host_rsa_key ]; then + ssh-keygen -q -t rsa -f /etc/ssh/ssh_host_rsa_key -C '' -N '' + chmod 0600 /etc/ssh/ssh_host_rsa_key + chmod 0644 /etc/ssh/ssh_host_rsa_key.pub + fi + + if ! pgrep -x sshd &>/dev/null; then + /usr/sbin/sshd + fi + echo + SECONDS=0 + while true; do + if ssh-keyscan localhost 2>&1 | grep -q OpenSSH; then + echo "SSH is ready to rock" + break + fi + if [ "$SECONDS" -gt 20 ]; then + echo "FAILED: SSH failed to come up after 20 secs" + exit 1 + fi + echo "waiting for SSH to come up" + sleep 1 + done + echo + if ! [ -f /root/.ssh/known_hosts ]; then + ssh-keyscan localhost || : + ssh-keyscan 0.0.0.0 || : + fi | tee -a /root/.ssh/known_hosts + hostname="$(hostname -f)" + if ! grep -q "$hostname" /root/.ssh/known_hosts; then + ssh-keyscan "$hostname" || : + fi | tee -a /root/.ssh/known_hosts + + mkdir -pv /hadoop/logs + + sed -i "s/localhost/$hostname/" /hadoop/etc/hadoop/core-site.xml + rm /run/nologin + start-dfs.sh + start-yarn.sh + tail -f /dev/null /hadoop/logs/* + stop-yarn.sh + stop-dfs.sh +fi diff --git a/hadoop/hadoop/get_versions b/hadoop/hadoop/get_versions new file mode 100755 index 0000000..10a44e7 --- /dev/null +++ b/hadoop/hadoop/get_versions @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# vim:ts=4:sts=4:sw=4:et +# +# Author: Hari Sekhon +# Date: 2018-10-07 20:46:38 +0100 (Sun, 07 Oct 2018) +# +# https://github.com/HariSekhon/Dockerfiles +# +# License: see accompanying Hari Sekhon LICENSE file +# +# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help steer this or other code I publish +# +# https://www.linkedin.com/in/HariSekhon +# + +# Get list of versions to be used by tests/check_for_new_version + +set -euo pipefail +[ -n "${DEBUG:-}" ] && set -x + +get_versions(){ + curl -sS http://archive.apache.org/dist/hadoop/common/ | + # no grep -P on Mac, requires GNU grep :-/ + egrep -i -o 'href="hadoop-[[:digit:]]+(\.[[:digit:]]+)+(-[[:alpha:]]+)?' | + grep -v -e alpha -e beta | + sed 's/href="hadoop-//' +} + +get_versions diff --git a/hadoop/hadoop/hooks/post_build b/hadoop/hadoop/hooks/post_build new file mode 100755 index 0000000..30a45e1 --- /dev/null +++ b/hadoop/hadoop/hooks/post_build @@ -0,0 +1,132 @@ +#!/usr/bin/env bash +# vim:ts=4:sts=4:sw=4:et +# +# Author: Hari Sekhon +# Date: 2016-12-17 19:47:36 +0000 (Sat, 17 Dec 2016) +# +# https://github.com/HariSekhon/Dockerfiles +# +# License: see accompanying Hari Sekhon LICENSE file +# +# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help steer this or other code I publish +# +# https://www.linkedin.com/in/HariSekhon +# + +set -eu +[ -n "${DEBUG:-}" ] && set -x +srcdir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +highest_tag="unknown" + +current_scala="2.11" + +if [ -z "${IMAGE_NAME:-}" ]; then + echo "IMAGE_NAME not set, determining from Makefile" + IMAGE_NAME="$(awk -F= '/^[[:space:]]*REPO[[:space:]]*:*=/{print $2}' "$srcdir/../Makefile" | sed 's/[[:space:]]//g;s/"//g')" + if [ -z "$IMAGE_NAME" ]; then + echo "FAILED to determine IMAGE_NAME From Makefile" + exit 1 + fi + echo "determined image name from Makefile to be '$IMAGE_NAME'" +fi +DOCKER_REPO="${DOCKER_REPO:-${IMAGE_NAME/:*}}" + +if [ -z "${SOURCE_BRANCH:-}" ]; then + echo "SOURCE_BRANCH not set, determining from Makefile" + SOURCE_BRANCH="$(git branch | sed -n 's/*[[:space:]]*//p')" + echo "determined source branch from git to be '$SOURCE_BRANCH'" +fi +# readlink is not portable across Linux + Mac +docker_branch_base="$(basename "$(cd "$(dirname "${BASH_SOURCE[0]}")/.."; echo "$PWD")")" +if ! git branch -a | grep -q "$docker_branch_base"; then + docker_branch_base="${docker_branch_base%-dev}" + docker_branch_base="${docker_branch_base%-cluster}" + if ! git branch -a | grep -q "$docker_branch_base"; then + docker_branch_base="${docker_branch_base%-*}" + fi + # solrcloud -> solr + docker_branch_base="${docker_branch_base%cloud}" + if ! git branch -a | grep -q "$docker_branch_base"; then + if [ "$SOURCE_BRANCH" = "master" ]; then + echo "WARNING: cannot determine if docker branch base '$docker_branch_base' is correct while running on master branch (DockerHub only checks out master branch)" + else + echo "ERROR: failed to determine docker branch base even after normalization rules, tried '$docker_branch_base'" + echo + echo "These are the branches found:" + echo + git branch -a + exit 1 + fi + fi +fi +echo "determined docker branch base = $docker_branch_base" +docker_version_base="${docker_branch_base//-/_}" +echo "will search Dockerfile using docker version base = $docker_version_base" +if [ "$docker_branch_base" = "kafka" ]; then + highest_tag="${current_scala}_0.9" + echo "Kafka detected, overriding highest tag to be $highest_tag" +else + # This depends on the Dockerfiles content and version ordering being correct, but this also causes a lot of merge conflicts when merging improvements from master so not storing all previous versions commented out any more as it's more of an inconvenience + #highest_tag="$(grep -i ARG.*${docker_version_base}_VERSION= Dockerfile | sed -n '$s/.*=//p' | awk -F. '{print $1"."$2}')" + # use Git branch versions as the source of truth + branches="$(git branch -a | grep -i -e "[[:space:]]$docker_branch_base\\>" -e "remotes/.*/$docker_branch_base\\>" || :)" + if [ -n "$branches" ]; then + #if [ "$(wc -l <<< "$branches")" -ge 2 ]; then + highest_tag="$(sed 's/.*\/// ; s/.*-\([[:digit:]]\)/\1/' <<< "$branches" | sed 's/\./ /g' | sort -n -k1 -k2 | sed 's/ /./g' | tail -n 1)" + echo "determined highest tag from git branches to be '$highest_tag'" + #else + # echo "WARNING: less than 2 matching branches found, cannot determine the highest tag" + #fi + fi +fi + +current_tag="" +if ! grep ':' <<< "$IMAGE_NAME"; then + echo "current tag not found in IMAGE_NAME=$IMAGE_NAME, determining from Dockerfile" + current_tag="$(grep -i "^[[:space:]]*ARG.*${docker_version_base}.*_VERSION=" "$srcdir/../Dockerfile" | sed -n '$s/.*=//;s/[[:space:]]*//g;s/"//g;p' || :)" + if [ -z "$current_tag" ]; then + echo "ERROR: failed to determine current tag from Dockerfile!" + exit 1 + fi + if [ "$current_tag" != "latest" ]; then + current_tag="$(awk -F. '{print $1"."$2}' <<< "$current_tag")" + fi + if [ "$docker_branch_base" = "kafka" ]; then + current_tag="${current_scala}_$current_tag" + fi + echo "current tag determined from Dockerfile to be '$current_tag'" + IMAGE_NAME="$IMAGE_NAME:$current_tag" +fi +if [ -z "$current_tag" ]; then + echo "WARNING: failed to determine current tag - IMAGE_NAME '$IMAGE_NAME' did not contain version?" + exit 0 +fi + +is_latest(){ + if [ "$SOURCE_BRANCH" = "master" ]; then + echo "determined to be latest build, due to running on master branch" + return 0 + elif [ "$SOURCE_BRANCH" = "$docker_branch_base-$highest_tag" ]; then + echo "determined to be highest tagged build, due to SOURCE_BRANCH = '$SOURCE_BRANCH' == \$docker_branch_base-\$highest_tag = '$docker_branch_base-$highest_tag'" + return 0 + fi + echo "$SOURCE_BRANCH is not the latest build, which is $docker_branch_base-$highest_tag" + return 1 +} + +if is_latest; then + if [ "$current_tag" = "latest" ] && + [ -x "$srcdir/../get_latest_version.sh" ]; then + newest_version="$("$srcdir/../get_latest_version.sh")" + echo "docker tag \"$IMAGE_NAME\" \"$DOCKER_REPO:$newest_version\"" + docker tag "$IMAGE_NAME" "$DOCKER_REPO:$newest_version" + echo "docker push \"$DOCKER_REPO:$newest_version\"" + docker push "$DOCKER_REPO:$newest_version" + else + echo "docker tag \"$IMAGE_NAME\" \"$DOCKER_REPO:latest\"" + docker tag "$IMAGE_NAME" "$DOCKER_REPO:latest" + echo "docker push \"$DOCKER_REPO:latest\"" + docker push "$DOCKER_REPO:latest" + fi +fi diff --git a/hadoop/hadoop/profile.d/hadoop.sh b/hadoop/hadoop/profile.d/hadoop.sh new file mode 100644 index 0000000..e122544 --- /dev/null +++ b/hadoop/hadoop/profile.d/hadoop.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +export HADOOP_HOME=/hadoop diff --git a/hadoop/hadoop/ssh/config b/hadoop/hadoop/ssh/config new file mode 100644 index 0000000..f30d239 --- /dev/null +++ b/hadoop/hadoop/ssh/config @@ -0,0 +1,2 @@ +Host * + StrictHostKeyChecking no