update
This commit is contained in:
parent
f954c53980
commit
22d5c9c503
@ -6,9 +6,8 @@ sudo ./build.sh
|
|||||||
```
|
```
|
||||||
|
|
||||||
hdfs路径:
|
hdfs路径:
|
||||||
http://172.17.0.2:50070/explorer.html#/
|
http://172.17.0.2:9870/dfshealth.html#tab-overview
|
||||||
|
|
||||||
http://172.17.0.2:50075/datanode.html
|
|
||||||
|
|
||||||
yarn路径
|
yarn路径
|
||||||
http://172.17.0.2:8088/cluster
|
http://172.17.0.2:8088/cluster
|
||||||
|
@ -7,5 +7,6 @@ docker stop hadoop
|
|||||||
docker rm hadoop
|
docker rm hadoop
|
||||||
|
|
||||||
docker run -dit --name hadoop --privileged=true \
|
docker run -dit --name hadoop --privileged=true \
|
||||||
|
-v /data/hadoop/dfs:/dfs/data \
|
||||||
-p 8042 -p 8088 -p 19888 -p 50070 -p 50075 zeekling/hadoop
|
-p 8042 -p 8088 -p 19888 -p 50070 -p 50075 zeekling/hadoop
|
||||||
|
|
||||||
|
14
hadoop/hadoop/.dockerignore
Normal file
14
hadoop/hadoop/.dockerignore
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
*.bak
|
||||||
|
*.md
|
||||||
|
*.orig
|
||||||
|
*.swp
|
||||||
|
*.tmp
|
||||||
|
.*
|
||||||
|
.DS_Store
|
||||||
|
.aws
|
||||||
|
.env
|
||||||
|
.gitignore
|
||||||
|
Dockerfile
|
||||||
|
LICENSE
|
||||||
|
Makefile
|
||||||
|
docker-compose.yml
|
52
hadoop/hadoop/CentOS-8-reg.repo
Normal file
52
hadoop/hadoop/CentOS-8-reg.repo
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
# CentOS-Base.repo
|
||||||
|
#
|
||||||
|
# The mirror system uses the connecting IP address of the client and the
|
||||||
|
# update status of each mirror to pick mirrors that are updated to and
|
||||||
|
# geographically close to the client. You should use this for CentOS updates
|
||||||
|
# unless you are manually picking other mirrors.
|
||||||
|
#
|
||||||
|
# If the mirrorlist= does not work for you, as a fall back you can try the
|
||||||
|
# remarked out baseurl= line instead.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
[BaseOS]
|
||||||
|
name=CentOS-$releasever - Base - repo.huaweicloud.com
|
||||||
|
baseurl=https://repo.huaweicloud.com/centos-vault/8.5.2111/BaseOS/$basearch/os/
|
||||||
|
#mirrorlist=https://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=BaseOS&infra=$infra
|
||||||
|
gpgcheck=1
|
||||||
|
gpgkey=https://repo.huaweicloud.com/centos/RPM-GPG-KEY-CentOS-Official
|
||||||
|
|
||||||
|
#released updates
|
||||||
|
[AppStream]
|
||||||
|
name=CentOS-$releasever - AppStream - repo.huaweicloud.com
|
||||||
|
baseurl=https://repo.huaweicloud.com/centos-vault/8.5.2111/AppStream/$basearch/os/
|
||||||
|
#mirrorlist=https://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=AppStream&infra=$infra
|
||||||
|
gpgcheck=1
|
||||||
|
gpgkey=https://repo.huaweicloud.com/centos/RPM-GPG-KEY-CentOS-Official
|
||||||
|
|
||||||
|
[PowerTools]
|
||||||
|
name=CentOS-$releasever - PowerTools - repo.huaweicloud.com
|
||||||
|
baseurl=https://repo.huaweicloud.com/centos-vault/8.5.2111/PowerTools/$basearch/os/
|
||||||
|
#mirrorlist=https://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=PowerTools&infra=$infra
|
||||||
|
gpgcheck=1
|
||||||
|
gpgkey=https://repo.huaweicloud.com/centos/RPM-GPG-KEY-CentOS-Official
|
||||||
|
|
||||||
|
|
||||||
|
#additional packages that may be useful
|
||||||
|
[extras]
|
||||||
|
name=CentOS-$releasever - Extras - repo.huaweicloud.com
|
||||||
|
baseurl=https://repo.huaweicloud.com/centos-vault/8.5.2111/extras/$basearch/os/
|
||||||
|
#mirrorlist=https://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=extras
|
||||||
|
gpgcheck=1
|
||||||
|
gpgkey=https://repo.huaweicloud.com/centos/RPM-GPG-KEY-CentOS-Official
|
||||||
|
|
||||||
|
|
||||||
|
#additional packages that extend functionality of existing packages
|
||||||
|
[centosplus]
|
||||||
|
name=CentOS-$releasever - Plus - repo.huaweicloud.com
|
||||||
|
baseurl=https://repo.huaweicloud.com/centos-vault/8.5.2111/centosplus/$basearch/os/
|
||||||
|
#mirrorlist=https://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=centosplus
|
||||||
|
gpgcheck=1
|
||||||
|
enabled=0
|
||||||
|
gpgkey=https://repo.huaweicloud.com/centos/RPM-GPG-KEY-CentOS-Official
|
99
hadoop/hadoop/Dockerfile
Normal file
99
hadoop/hadoop/Dockerfile
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
#
|
||||||
|
# Author: Hari Sekhon
|
||||||
|
# Date: 2016-04-24 21:18:57 +0100 (Sun, 24 Apr 2016)
|
||||||
|
#
|
||||||
|
# vim:ts=4:sts=4:sw=4:et
|
||||||
|
#
|
||||||
|
# https://github.com/HariSekhon/Dockerfiles
|
||||||
|
#
|
||||||
|
# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback
|
||||||
|
#
|
||||||
|
# https://www.linkedin.com/in/HariSekhon
|
||||||
|
#
|
||||||
|
|
||||||
|
# nosemgrep: dockerfile.audit.dockerfile-source-not-pinned.dockerfile-source-not-pinned
|
||||||
|
FROM harisekhon/centos-java:latest
|
||||||
|
|
||||||
|
ARG HADOOP_VERSION=3.3.1
|
||||||
|
|
||||||
|
LABEL org.opencontainers.image.description="Hadoop" \
|
||||||
|
org.opencontainers.image.version="$HADOOP_VERSION" \
|
||||||
|
org.opencontainers.image.authors="Hari Sekhon (https://www.linkedin.com/in/HariSekhon)" \
|
||||||
|
org.opencontainers.image.url="https://ghcr.io/HariSekhon/hadoop" \
|
||||||
|
org.opencontainers.image.documentation="https://hub.docker.com/r/harisekhon/hadoop" \
|
||||||
|
org.opencontainers.image.source="https://github.com/HariSekhon/Dockerfiles"
|
||||||
|
|
||||||
|
ARG TAR=hadoop-$HADOOP_VERSION.tar.gz
|
||||||
|
|
||||||
|
ENV PATH $PATH:/hadoop/bin
|
||||||
|
|
||||||
|
WORKDIR /
|
||||||
|
|
||||||
|
RUN rm -rf /etc/yum.repos.d/*.repo
|
||||||
|
|
||||||
|
COPY ./CentOS-8-reg.repo /etc/yum.repos.d/CentOS-Base.repo
|
||||||
|
|
||||||
|
RUN yum clean all && yum makecache && yum install -y openssh-server openssh-clients tar which
|
||||||
|
|
||||||
|
RUN set -eux && \
|
||||||
|
yum install -y wget hostname && \
|
||||||
|
# --max-redirect - some apache mirrors redirect a couple times and give you the latest version instead
|
||||||
|
# but this breaks stuff later because the link will not point to the right dir
|
||||||
|
# (and is also the wrong version for the tag)
|
||||||
|
# wget -t 10 --max-redirect 1 --retry-connrefused -O "$TAR" "http://www.apache.org/dyn/closer.lua?filename=hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-$HADOOP_VERSION.tar.gz&action=download" || \
|
||||||
|
wget -t 10 --max-redirect 1 --retry-connrefused -O "$TAR" "http://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-$HADOOP_VERSION.tar.gz" && \
|
||||||
|
tar zxf "$TAR" && \
|
||||||
|
# check tarball was extracted to the right place, helps ensure it's the right version and the link will work
|
||||||
|
test -d "hadoop-$HADOOP_VERSION" && \
|
||||||
|
ln -sv "hadoop-$HADOOP_VERSION" hadoop && \
|
||||||
|
mkdir /etc/hadoop && \
|
||||||
|
ln -s /hadoop/etc/hadoop /etc/hadoop/conf && \
|
||||||
|
rm -fv "$TAR" && \
|
||||||
|
{ rm -rf hadoop/share/doc; : ; } && \
|
||||||
|
yum autoremove -y && \
|
||||||
|
# gets autoremoved, ensure it's added back as Hadoop scripts need it
|
||||||
|
yum install -y hostname && \
|
||||||
|
yum clean all && \
|
||||||
|
rm -rf /var/cache/yum
|
||||||
|
|
||||||
|
COPY entrypoint.sh /
|
||||||
|
COPY conf/core-site.xml /hadoop/etc/hadoop/
|
||||||
|
COPY conf/hdfs-site.xml /hadoop/etc/hadoop/
|
||||||
|
COPY conf/yarn-site.xml /hadoop/etc/hadoop/
|
||||||
|
COPY conf/mapred-site.xml /hadoop/etc/hadoop/
|
||||||
|
COPY profile.d/hadoop.sh /etc/profile.d/
|
||||||
|
COPY ssh/config /root/.ssh/
|
||||||
|
|
||||||
|
RUN set -eux && \
|
||||||
|
# Hadoop 1.x
|
||||||
|
#/hadoop/bin/hadoop namenode -format && \
|
||||||
|
# Hadoop 2.x
|
||||||
|
/hadoop/bin/hdfs namenode -format && \
|
||||||
|
groupadd hadoop && \
|
||||||
|
useradd -g hadoop hdfs && \
|
||||||
|
useradd -g hadoop yarn && \
|
||||||
|
mkdir -p /dfs/name && \
|
||||||
|
mkdir -p /hadoop/logs && \
|
||||||
|
chown -R hdfs:hadoop /dfs/name && \
|
||||||
|
chgrp -R hadoop /hadoop/logs && \
|
||||||
|
chmod -R 0770 /hadoop/logs && \
|
||||||
|
mkdir -p /root/.ssh \
|
||||||
|
/home/hdfs/.ssh \
|
||||||
|
/home/yarn/.ssh && \
|
||||||
|
chown hdfs /home/hdfs/.ssh && \
|
||||||
|
chown yarn /home/yarn/.ssh && \
|
||||||
|
chmod 0700 /root/.ssh \
|
||||||
|
/home/hdfs/.ssh \
|
||||||
|
/home/yarn/.ssh
|
||||||
|
|
||||||
|
ENV HDFS_NAMENODE_USER=hdfs
|
||||||
|
ENV HDFS_SECONDARYNAMENODE_USER=hdfs
|
||||||
|
ENV HDFS_DATANODE_USER=hdfs
|
||||||
|
ENV YARN_RESOURCEMANAGER_USER=yarn
|
||||||
|
ENV YARN_NODEMANAGER_USER=yarn
|
||||||
|
|
||||||
|
#EXPOSE 8020 8042 8088 9000 10020 19888 50010 50020 50070 50075 50090
|
||||||
|
# Hadoop 3.0 changed ports :-(
|
||||||
|
EXPOSE 8020 8042 8088 9000 9868 9870 10020 19888 50010 50020 50090
|
||||||
|
|
||||||
|
CMD ["/entrypoint.sh"]
|
18
hadoop/hadoop/Makefile
Normal file
18
hadoop/hadoop/Makefile
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
#
|
||||||
|
# Author: Hari Sekhon
|
||||||
|
# Date: 2016-04-24 21:32:37 +0100 (Sun, 24 Apr 2016)
|
||||||
|
#
|
||||||
|
# vim:ts=4:sts=4:sw=4:noet
|
||||||
|
#
|
||||||
|
# https://github.com/HariSekhon/Dockerfiles
|
||||||
|
#
|
||||||
|
# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help improve or steer this or other code I publish
|
||||||
|
#
|
||||||
|
# https://www.linkedin.com/in/HariSekhon
|
||||||
|
#
|
||||||
|
|
||||||
|
REPO := harisekhon/hadoop
|
||||||
|
|
||||||
|
include ../Makefile.in
|
||||||
|
|
||||||
|
MAP_PORTS := 8042 8088 19888 50010 50020 50070 50075 50090
|
43
hadoop/hadoop/README.md
Normal file
43
hadoop/hadoop/README.md
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
# Apache Hadoop
|
||||||
|
|
||||||
|
[![DockerHub Hadoop](https://img.shields.io/badge/DockerHub-harisekhon%2Fhadoop-blue)](https://hub.docker.com/repository/docker/harisekhon/hadoop)
|
||||||
|
|
||||||
|
https://hadoop.apache.org/
|
||||||
|
|
||||||
|
Big Data Distributed Storage and Compute Software
|
||||||
|
|
||||||
|
- Yarn - Distributed Processing Framework for running MapReduce, Spark and other application frameworks
|
||||||
|
- HDFS - Distributed Storage
|
||||||
|
|
||||||
|
By default starts a pseudo-distributed cluster of 4 daemons in a single container:
|
||||||
|
|
||||||
|
- Yarn
|
||||||
|
- ResourceManager - Cluster Processing Master (submit jobs here)
|
||||||
|
- NodeManager - Cluster Processing Worker
|
||||||
|
- HDFS
|
||||||
|
- NameNode - Filesystem Master
|
||||||
|
- DataNode - Filesystem Worker
|
||||||
|
|
||||||
|
Perfect for development and testing. Recommended to use Docker with 4GB+ RAM for this pseudo-cluster container.
|
||||||
|
|
||||||
|
For real scaling just start a single daemon in each container for fully distributed setup.
|
||||||
|
|
||||||
|
|
||||||
|
To run the all-in-one-container cluster and expose all the UIs for NodeManager, ResourceManager, NameNode and DataNode respectively, do:
|
||||||
|
```
|
||||||
|
docker run -ti -p 8042 -p 8088 -p 19888 -p 50070 -p 50075 harisekhon/hadoop
|
||||||
|
```
|
||||||
|
|
||||||
|
or with docker-compose:
|
||||||
|
|
||||||
|
```
|
||||||
|
docker-compose up
|
||||||
|
```
|
||||||
|
|
||||||
|
or without `docker-compose`, a shortcut for the docker run command:
|
||||||
|
|
||||||
|
```
|
||||||
|
make run
|
||||||
|
```
|
||||||
|
|
||||||
|
Related Docker images can be found for many Open Source, Big Data and NoSQL technologies on [my DockerHub profile](https://hub.docker.com/r/harisekhon). The source for them all can be found in the [master Dockerfiles GitHub repo](https://github.com/HariSekhon/Dockerfiles/).
|
8
hadoop/hadoop/conf/core-site.xml
Normal file
8
hadoop/hadoop/conf/core-site.xml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>fs.defaultFS</name>
|
||||||
|
<value>hdfs://localhost:8020</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
16
hadoop/hadoop/conf/hdfs-site.xml
Normal file
16
hadoop/hadoop/conf/hdfs-site.xml
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>dfs.replication</name>
|
||||||
|
<value>1</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>dfs.namenode.name.dir</name>
|
||||||
|
<value>/dfs/name</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>dfs.datanode.data.dir</name>
|
||||||
|
<value>/dfs/data</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
8
hadoop/hadoop/conf/mapred-site.xml
Normal file
8
hadoop/hadoop/conf/mapred-site.xml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.framework.name</name>
|
||||||
|
<value>yarn</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
20
hadoop/hadoop/conf/yarn-site.xml
Normal file
20
hadoop/hadoop/conf/yarn-site.xml
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
<?xml version="1.0"?>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>yarn.nodemanager.aux-services</name>
|
||||||
|
<value>mapreduce_shuffle</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
|
||||||
|
<value>100</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.nodemanager.resource.cpu-vcores</name>
|
||||||
|
<value>1</value>
|
||||||
|
</property>
|
||||||
|
<!-- needs to be set to 3GB in order for MapReduce MonteCarlo default job to succeed, see tests/test_hadoop.sh in Advanced Nagios Plugins Collection which does extensive tests against harisekhon/hadoop-dev image -->
|
||||||
|
<property>
|
||||||
|
<name>yarn.nodemanager.resource.memory-mb</name>
|
||||||
|
<value>3072</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
32
hadoop/hadoop/docker-compose.yml
Normal file
32
hadoop/hadoop/docker-compose.yml
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
# vim:ts=2:sts=2:sw=2:et
|
||||||
|
#
|
||||||
|
# Author: Hari Sekhon
|
||||||
|
# Date: 2016-12-09 21:25:07 +0000 (Fri, 09 Dec 2016)
|
||||||
|
#
|
||||||
|
# https://github.com/HariSekhon/Dockerfiles
|
||||||
|
#
|
||||||
|
# License: see accompanying Hari Sekhon LICENSE file
|
||||||
|
#
|
||||||
|
# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help steer this or other code I publish
|
||||||
|
#
|
||||||
|
# https://www.linkedin.com/in/HariSekhon
|
||||||
|
#
|
||||||
|
|
||||||
|
version: '3'
|
||||||
|
services:
|
||||||
|
hadoop:
|
||||||
|
image: harisekhon/hadoop:${VERSION:-latest}
|
||||||
|
ports:
|
||||||
|
#- 8020:8020
|
||||||
|
- 8042:8042
|
||||||
|
- 8088:8088
|
||||||
|
#- 9000:9000
|
||||||
|
- 9864:9864
|
||||||
|
- 9870:9870
|
||||||
|
#- 10020:10020
|
||||||
|
- 19888:19888
|
||||||
|
- 50010:50010
|
||||||
|
- 50020:50020
|
||||||
|
- 50070:50070
|
||||||
|
- 50075:50075
|
||||||
|
- 50090:50090
|
88
hadoop/hadoop/entrypoint.sh
Executable file
88
hadoop/hadoop/entrypoint.sh
Executable file
@ -0,0 +1,88 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# vim:ts=4:sts=4:sw=4:et
|
||||||
|
#
|
||||||
|
# Author: Hari Sekhon
|
||||||
|
# Date: 2016-04-24 21:29:46 +0100 (Sun, 24 Apr 2016)
|
||||||
|
#
|
||||||
|
# https://github.com/HariSekhon/Dockerfiles
|
||||||
|
#
|
||||||
|
# License: see accompanying Hari Sekhon LICENSE file
|
||||||
|
#
|
||||||
|
# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback
|
||||||
|
#
|
||||||
|
# https://www.linkedin.com/in/HariSekhon
|
||||||
|
#
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
[ -n "${DEBUG:-}" ] && set -x
|
||||||
|
|
||||||
|
export JAVA_HOME="${JAVA_HOME:-/usr}"
|
||||||
|
|
||||||
|
export PATH="$PATH:/hadoop/sbin:/hadoop/bin"
|
||||||
|
|
||||||
|
if [ $# -gt 0 ]; then
|
||||||
|
exec "$@"
|
||||||
|
else
|
||||||
|
for x in root hdfs yarn; do
|
||||||
|
if ! [ -f "/$x/.ssh/id_rsa" ]; then
|
||||||
|
su - "$x" <<-EOF
|
||||||
|
[ -n "${DEBUG:-}" ] && set -x
|
||||||
|
ssh-keygen -t rsa -f ~/.ssh/id_rsa -N ""
|
||||||
|
EOF
|
||||||
|
fi
|
||||||
|
if ! [ -f "$x/.ssh/authorized_keys" ]; then
|
||||||
|
su - "$x" <<-EOF
|
||||||
|
[ -n "${DEBUG:-}" ] && set -x
|
||||||
|
cp -rfv ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys
|
||||||
|
chmod -v 0400 ~/.ssh/authorized_keys
|
||||||
|
EOF
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# removed in newer versions of CentOS
|
||||||
|
if ! [ -f /etc/ssh/ssh_host_rsa_key ] && [ -x /usr/sbin/sshd-keygen ]; then
|
||||||
|
/usr/sbin/sshd-keygen || :
|
||||||
|
fi
|
||||||
|
if ! [ -f /etc/ssh/ssh_host_rsa_key ]; then
|
||||||
|
ssh-keygen -q -t rsa -f /etc/ssh/ssh_host_rsa_key -C '' -N ''
|
||||||
|
chmod 0600 /etc/ssh/ssh_host_rsa_key
|
||||||
|
chmod 0644 /etc/ssh/ssh_host_rsa_key.pub
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! pgrep -x sshd &>/dev/null; then
|
||||||
|
/usr/sbin/sshd
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
SECONDS=0
|
||||||
|
while true; do
|
||||||
|
if ssh-keyscan localhost 2>&1 | grep -q OpenSSH; then
|
||||||
|
echo "SSH is ready to rock"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
if [ "$SECONDS" -gt 20 ]; then
|
||||||
|
echo "FAILED: SSH failed to come up after 20 secs"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "waiting for SSH to come up"
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
echo
|
||||||
|
if ! [ -f /root/.ssh/known_hosts ]; then
|
||||||
|
ssh-keyscan localhost || :
|
||||||
|
ssh-keyscan 0.0.0.0 || :
|
||||||
|
fi | tee -a /root/.ssh/known_hosts
|
||||||
|
hostname="$(hostname -f)"
|
||||||
|
if ! grep -q "$hostname" /root/.ssh/known_hosts; then
|
||||||
|
ssh-keyscan "$hostname" || :
|
||||||
|
fi | tee -a /root/.ssh/known_hosts
|
||||||
|
|
||||||
|
mkdir -pv /hadoop/logs
|
||||||
|
|
||||||
|
sed -i "s/localhost/$hostname/" /hadoop/etc/hadoop/core-site.xml
|
||||||
|
rm /run/nologin
|
||||||
|
start-dfs.sh
|
||||||
|
start-yarn.sh
|
||||||
|
tail -f /dev/null /hadoop/logs/*
|
||||||
|
stop-yarn.sh
|
||||||
|
stop-dfs.sh
|
||||||
|
fi
|
29
hadoop/hadoop/get_versions
Executable file
29
hadoop/hadoop/get_versions
Executable file
@ -0,0 +1,29 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# vim:ts=4:sts=4:sw=4:et
|
||||||
|
#
|
||||||
|
# Author: Hari Sekhon
|
||||||
|
# Date: 2018-10-07 20:46:38 +0100 (Sun, 07 Oct 2018)
|
||||||
|
#
|
||||||
|
# https://github.com/HariSekhon/Dockerfiles
|
||||||
|
#
|
||||||
|
# License: see accompanying Hari Sekhon LICENSE file
|
||||||
|
#
|
||||||
|
# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help steer this or other code I publish
|
||||||
|
#
|
||||||
|
# https://www.linkedin.com/in/HariSekhon
|
||||||
|
#
|
||||||
|
|
||||||
|
# Get list of versions to be used by tests/check_for_new_version
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
[ -n "${DEBUG:-}" ] && set -x
|
||||||
|
|
||||||
|
get_versions(){
|
||||||
|
curl -sS http://archive.apache.org/dist/hadoop/common/ |
|
||||||
|
# no grep -P on Mac, requires GNU grep :-/
|
||||||
|
egrep -i -o 'href="hadoop-[[:digit:]]+(\.[[:digit:]]+)+(-[[:alpha:]]+)?' |
|
||||||
|
grep -v -e alpha -e beta |
|
||||||
|
sed 's/href="hadoop-//'
|
||||||
|
}
|
||||||
|
|
||||||
|
get_versions
|
132
hadoop/hadoop/hooks/post_build
Executable file
132
hadoop/hadoop/hooks/post_build
Executable file
@ -0,0 +1,132 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# vim:ts=4:sts=4:sw=4:et
|
||||||
|
#
|
||||||
|
# Author: Hari Sekhon
|
||||||
|
# Date: 2016-12-17 19:47:36 +0000 (Sat, 17 Dec 2016)
|
||||||
|
#
|
||||||
|
# https://github.com/HariSekhon/Dockerfiles
|
||||||
|
#
|
||||||
|
# License: see accompanying Hari Sekhon LICENSE file
|
||||||
|
#
|
||||||
|
# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help steer this or other code I publish
|
||||||
|
#
|
||||||
|
# https://www.linkedin.com/in/HariSekhon
|
||||||
|
#
|
||||||
|
|
||||||
|
set -eu
|
||||||
|
[ -n "${DEBUG:-}" ] && set -x
|
||||||
|
srcdir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
|
||||||
|
highest_tag="unknown"
|
||||||
|
|
||||||
|
current_scala="2.11"
|
||||||
|
|
||||||
|
if [ -z "${IMAGE_NAME:-}" ]; then
|
||||||
|
echo "IMAGE_NAME not set, determining from Makefile"
|
||||||
|
IMAGE_NAME="$(awk -F= '/^[[:space:]]*REPO[[:space:]]*:*=/{print $2}' "$srcdir/../Makefile" | sed 's/[[:space:]]//g;s/"//g')"
|
||||||
|
if [ -z "$IMAGE_NAME" ]; then
|
||||||
|
echo "FAILED to determine IMAGE_NAME From Makefile"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "determined image name from Makefile to be '$IMAGE_NAME'"
|
||||||
|
fi
|
||||||
|
DOCKER_REPO="${DOCKER_REPO:-${IMAGE_NAME/:*}}"
|
||||||
|
|
||||||
|
if [ -z "${SOURCE_BRANCH:-}" ]; then
|
||||||
|
echo "SOURCE_BRANCH not set, determining from Makefile"
|
||||||
|
SOURCE_BRANCH="$(git branch | sed -n 's/*[[:space:]]*//p')"
|
||||||
|
echo "determined source branch from git to be '$SOURCE_BRANCH'"
|
||||||
|
fi
|
||||||
|
# readlink is not portable across Linux + Mac
|
||||||
|
docker_branch_base="$(basename "$(cd "$(dirname "${BASH_SOURCE[0]}")/.."; echo "$PWD")")"
|
||||||
|
if ! git branch -a | grep -q "$docker_branch_base"; then
|
||||||
|
docker_branch_base="${docker_branch_base%-dev}"
|
||||||
|
docker_branch_base="${docker_branch_base%-cluster}"
|
||||||
|
if ! git branch -a | grep -q "$docker_branch_base"; then
|
||||||
|
docker_branch_base="${docker_branch_base%-*}"
|
||||||
|
fi
|
||||||
|
# solrcloud -> solr
|
||||||
|
docker_branch_base="${docker_branch_base%cloud}"
|
||||||
|
if ! git branch -a | grep -q "$docker_branch_base"; then
|
||||||
|
if [ "$SOURCE_BRANCH" = "master" ]; then
|
||||||
|
echo "WARNING: cannot determine if docker branch base '$docker_branch_base' is correct while running on master branch (DockerHub only checks out master branch)"
|
||||||
|
else
|
||||||
|
echo "ERROR: failed to determine docker branch base even after normalization rules, tried '$docker_branch_base'"
|
||||||
|
echo
|
||||||
|
echo "These are the branches found:"
|
||||||
|
echo
|
||||||
|
git branch -a
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
echo "determined docker branch base = $docker_branch_base"
|
||||||
|
docker_version_base="${docker_branch_base//-/_}"
|
||||||
|
echo "will search Dockerfile using docker version base = $docker_version_base"
|
||||||
|
if [ "$docker_branch_base" = "kafka" ]; then
|
||||||
|
highest_tag="${current_scala}_0.9"
|
||||||
|
echo "Kafka detected, overriding highest tag to be $highest_tag"
|
||||||
|
else
|
||||||
|
# This depends on the Dockerfiles content and version ordering being correct, but this also causes a lot of merge conflicts when merging improvements from master so not storing all previous versions commented out any more as it's more of an inconvenience
|
||||||
|
#highest_tag="$(grep -i ARG.*${docker_version_base}_VERSION= Dockerfile | sed -n '$s/.*=//p' | awk -F. '{print $1"."$2}')"
|
||||||
|
# use Git branch versions as the source of truth
|
||||||
|
branches="$(git branch -a | grep -i -e "[[:space:]]$docker_branch_base\\>" -e "remotes/.*/$docker_branch_base\\>" || :)"
|
||||||
|
if [ -n "$branches" ]; then
|
||||||
|
#if [ "$(wc -l <<< "$branches")" -ge 2 ]; then
|
||||||
|
highest_tag="$(sed 's/.*\/// ; s/.*-\([[:digit:]]\)/\1/' <<< "$branches" | sed 's/\./ /g' | sort -n -k1 -k2 | sed 's/ /./g' | tail -n 1)"
|
||||||
|
echo "determined highest tag from git branches to be '$highest_tag'"
|
||||||
|
#else
|
||||||
|
# echo "WARNING: less than 2 matching branches found, cannot determine the highest tag"
|
||||||
|
#fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
current_tag=""
|
||||||
|
if ! grep ':' <<< "$IMAGE_NAME"; then
|
||||||
|
echo "current tag not found in IMAGE_NAME=$IMAGE_NAME, determining from Dockerfile"
|
||||||
|
current_tag="$(grep -i "^[[:space:]]*ARG.*${docker_version_base}.*_VERSION=" "$srcdir/../Dockerfile" | sed -n '$s/.*=//;s/[[:space:]]*//g;s/"//g;p' || :)"
|
||||||
|
if [ -z "$current_tag" ]; then
|
||||||
|
echo "ERROR: failed to determine current tag from Dockerfile!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [ "$current_tag" != "latest" ]; then
|
||||||
|
current_tag="$(awk -F. '{print $1"."$2}' <<< "$current_tag")"
|
||||||
|
fi
|
||||||
|
if [ "$docker_branch_base" = "kafka" ]; then
|
||||||
|
current_tag="${current_scala}_$current_tag"
|
||||||
|
fi
|
||||||
|
echo "current tag determined from Dockerfile to be '$current_tag'"
|
||||||
|
IMAGE_NAME="$IMAGE_NAME:$current_tag"
|
||||||
|
fi
|
||||||
|
if [ -z "$current_tag" ]; then
|
||||||
|
echo "WARNING: failed to determine current tag - IMAGE_NAME '$IMAGE_NAME' did not contain version?"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
is_latest(){
|
||||||
|
if [ "$SOURCE_BRANCH" = "master" ]; then
|
||||||
|
echo "determined to be latest build, due to running on master branch"
|
||||||
|
return 0
|
||||||
|
elif [ "$SOURCE_BRANCH" = "$docker_branch_base-$highest_tag" ]; then
|
||||||
|
echo "determined to be highest tagged build, due to SOURCE_BRANCH = '$SOURCE_BRANCH' == \$docker_branch_base-\$highest_tag = '$docker_branch_base-$highest_tag'"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
echo "$SOURCE_BRANCH is not the latest build, which is $docker_branch_base-$highest_tag"
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if is_latest; then
|
||||||
|
if [ "$current_tag" = "latest" ] &&
|
||||||
|
[ -x "$srcdir/../get_latest_version.sh" ]; then
|
||||||
|
newest_version="$("$srcdir/../get_latest_version.sh")"
|
||||||
|
echo "docker tag \"$IMAGE_NAME\" \"$DOCKER_REPO:$newest_version\""
|
||||||
|
docker tag "$IMAGE_NAME" "$DOCKER_REPO:$newest_version"
|
||||||
|
echo "docker push \"$DOCKER_REPO:$newest_version\""
|
||||||
|
docker push "$DOCKER_REPO:$newest_version"
|
||||||
|
else
|
||||||
|
echo "docker tag \"$IMAGE_NAME\" \"$DOCKER_REPO:latest\""
|
||||||
|
docker tag "$IMAGE_NAME" "$DOCKER_REPO:latest"
|
||||||
|
echo "docker push \"$DOCKER_REPO:latest\""
|
||||||
|
docker push "$DOCKER_REPO:latest"
|
||||||
|
fi
|
||||||
|
fi
|
3
hadoop/hadoop/profile.d/hadoop.sh
Normal file
3
hadoop/hadoop/profile.d/hadoop.sh
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
export HADOOP_HOME=/hadoop
|
2
hadoop/hadoop/ssh/config
Normal file
2
hadoop/hadoop/ssh/config
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
Host *
|
||||||
|
StrictHostKeyChecking no
|
Loading…
Reference in New Issue
Block a user