HADOOP-18682. Move hadoop docker scripts under the main source code (#6483). Contributed by Christos Bisias.
This commit is contained in:
parent
e4789a2fd3
commit
66baf1eb51
@ -0,0 +1,68 @@
|
|||||||
|
<!---
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
## Hadoop Docker
|
||||||
|
|
||||||
|
### Running from existing setups
|
||||||
|
|
||||||
|
There are special branches for running hadoop in docker.
|
||||||
|
|
||||||
|
The `docker-hadoop-runner*` branches contain scripts that set up base images that can be used for running any Hadoop version.
|
||||||
|
|
||||||
|
* [docker-hadoop-runner-latest](https://github.com/apache/hadoop/tree/docker-hadoop-runner-latest)
|
||||||
|
* [docker-hadoop-runner-jdk11](https://github.com/apache/hadoop/tree/docker-hadoop-runner-jdk11)
|
||||||
|
* [docker-hadoop-runner-jdk8](https://github.com/apache/hadoop/tree/docker-hadoop-runner-jdk8)
|
||||||
|
* [docker-hadoop-runner](https://github.com/apache/hadoop/tree/docker-hadoop-runner)
|
||||||
|
|
||||||
|
The `docker-hadoop*` branches can be used for running a specific version.
|
||||||
|
|
||||||
|
* [docker-hadoop-3](https://github.com/apache/hadoop/tree/docker-hadoop-3)
|
||||||
|
* `hadoop-3.3.6`
|
||||||
|
* [docker-hadoop-2](https://github.com/apache/hadoop/tree/docker-hadoop-2)
|
||||||
|
* `hadoop-2.10.2`
|
||||||
|
|
||||||
|
### Running from the source code
|
||||||
|
|
||||||
|
There is a setup under `hadoop-dist` that contains Docker Compose definitions
|
||||||
|
for running the current version of Hadoop in a multi-node docker environment.
|
||||||
|
|
||||||
|
This is meant for testing code changes locally and debugging.
|
||||||
|
|
||||||
|
The base image used by the Docker setup is built as part of the maven lifecycle.
|
||||||
|
The distribution files generated while building the project with the `-Pdist` profile enabled,
|
||||||
|
will be used for running hadoop inside the containers.
|
||||||
|
|
||||||
|
In order to start the docker environment you need to do the following
|
||||||
|
* Build the project, using the `-Pdist` profile
|
||||||
|
```shell
|
||||||
|
> mvn clean install -Dmaven.javadoc.skip=true -DskipTests -DskipShade -Pdist,src
|
||||||
|
```
|
||||||
|
* From the project root, navigate under the docker-compose dir under the generated dist directory
|
||||||
|
```shell
|
||||||
|
> cd hadoop-dist/target/hadoop-<current-version>/compose/hadoop
|
||||||
|
```
|
||||||
|
* Start the docker environment
|
||||||
|
```shell
|
||||||
|
> docker-compose up -d --scale datanode=3
|
||||||
|
```
|
||||||
|
* Connect to a container to execute commands
|
||||||
|
```shell
|
||||||
|
> docker exec -it hadoop_datanode_1 bash
|
||||||
|
bash-4.2$ hdfs dfs -mkdir /test
|
||||||
|
```
|
||||||
|
|
||||||
|
### Config files
|
||||||
|
|
||||||
|
To add or remove properties from the `core-site.xml`, `hdfs-site.xml`, etc. files used in the docker environment,
|
||||||
|
simply edit the `config` file before starting the containers. The changes will be persisted in the docker environment.
|
@ -236,3 +236,9 @@ Fully-Distributed Operation
|
|||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
For information on setting up fully-distributed, non-trivial clusters see [Cluster Setup](./ClusterSetup.html).
|
For information on setting up fully-distributed, non-trivial clusters see [Cluster Setup](./ClusterSetup.html).
|
||||||
|
|
||||||
|
Hadoop in Docker containers
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
For information on setting up hadoop in docker, using either official releases or the main source code,
|
||||||
|
check [Hadoop Docker](./HadoopDocker.html).
|
||||||
|
@ -29,6 +29,13 @@
|
|||||||
<name>Apache Hadoop Distribution</name>
|
<name>Apache Hadoop Distribution</name>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<file.encoding>UTF-8</file.encoding>
|
||||||
|
<downloadSources>true</downloadSources>
|
||||||
|
<docker.hadoop-runner.version>docker-hadoop-runner</docker.hadoop-runner.version>
|
||||||
|
<maven.test.skip>true</maven.test.skip>
|
||||||
|
</properties>
|
||||||
|
|
||||||
<!-- Using dependencies to ensure this module is the last one -->
|
<!-- Using dependencies to ensure this module is the last one -->
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
@ -151,6 +158,43 @@
|
|||||||
</execution>
|
</execution>
|
||||||
</executions>
|
</executions>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<artifactId>maven-resources-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>copy-compose-files</id>
|
||||||
|
<phase>package</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>copy-resources</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<outputDirectory>${project.build.directory}/hadoop-${project.version}/compose</outputDirectory>
|
||||||
|
<resources>
|
||||||
|
<resource>
|
||||||
|
<directory>src/main/compose</directory>
|
||||||
|
<filtering>true</filtering>
|
||||||
|
</resource>
|
||||||
|
</resources>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
<execution>
|
||||||
|
<id>copy-and-filter-dockerfile</id>
|
||||||
|
<phase>package</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>copy-resources</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<outputDirectory>${project.build.directory}/hadoop-${project.version}</outputDirectory>
|
||||||
|
<resources>
|
||||||
|
<resource>
|
||||||
|
<directory>src/main/docker</directory>
|
||||||
|
<filtering>true</filtering>
|
||||||
|
</resource>
|
||||||
|
</resources>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
</build>
|
</build>
|
||||||
|
|
||||||
@ -230,6 +274,56 @@
|
|||||||
</plugins>
|
</plugins>
|
||||||
</build>
|
</build>
|
||||||
</profile>
|
</profile>
|
||||||
|
<profile>
|
||||||
|
<id>docker-build</id>
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>io.fabric8</groupId>
|
||||||
|
<artifactId>docker-maven-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<goals>
|
||||||
|
<goal>build</goal>
|
||||||
|
</goals>
|
||||||
|
<phase>package</phase>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
<configuration>
|
||||||
|
<images>
|
||||||
|
<image>
|
||||||
|
<name>${docker.image}</name>
|
||||||
|
<build>
|
||||||
|
<dockerFileDir>
|
||||||
|
${project.build.directory}/hadoop-${project.version}
|
||||||
|
</dockerFileDir>
|
||||||
|
</build>
|
||||||
|
</image>
|
||||||
|
</images>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</profile>
|
||||||
|
<profile>
|
||||||
|
<id>docker-push</id>
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>io.fabric8</groupId>
|
||||||
|
<artifactId>docker-maven-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<goals>
|
||||||
|
<goal>push</goal>
|
||||||
|
</goals>
|
||||||
|
<phase>package</phase>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</profile>
|
||||||
</profiles>
|
</profiles>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
18
hadoop-dist/src/main/compose/hadoop/.env
Normal file
18
hadoop-dist/src/main/compose/hadoop/.env
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
HADOOP_IMAGE=apache/hadoop
|
||||||
|
HADOOP_RUNNER_VERSION=${docker.hadoop-runner.version}
|
||||||
|
HADOOP_RUNNER_IMAGE=apache/hadoop-runner
|
50
hadoop-dist/src/main/compose/hadoop/config
Normal file
50
hadoop-dist/src/main/compose/hadoop/config
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
CORE-SITE.XML_fs.default.name=hdfs://namenode
|
||||||
|
CORE-SITE.XML_fs.defaultFS=hdfs://namenode
|
||||||
|
|
||||||
|
HDFS-SITE.XML_dfs.namenode.rpc-address=namenode:8020
|
||||||
|
HDFS-SITE.XML_dfs.replication=1
|
||||||
|
|
||||||
|
MAPRED-SITE.XML_mapreduce.framework.name=yarn
|
||||||
|
MAPRED-SITE.XML_yarn.app.mapreduce.am.env=HADOOP_MAPRED_HOME=$HADOOP_HOME
|
||||||
|
MAPRED-SITE.XML_mapreduce.map.env=HADOOP_MAPRED_HOME=$HADOOP_HOME
|
||||||
|
MAPRED-SITE.XML_mapreduce.reduce.env=HADOOP_MAPRED_HOME=$HADOOP_HOME
|
||||||
|
|
||||||
|
YARN-SITE.XML_yarn.resourcemanager.hostname=resourcemanager
|
||||||
|
YARN-SITE.XML_yarn.nodemanager.pmem-check-enabled=false
|
||||||
|
YARN-SITE.XML_yarn.nodemanager.delete.debug-delay-sec=600
|
||||||
|
YARN-SITE.XML_yarn.nodemanager.vmem-check-enabled=false
|
||||||
|
YARN-SITE.XML_yarn.nodemanager.aux-services=mapreduce_shuffle
|
||||||
|
|
||||||
|
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.maximum-applications=10000
|
||||||
|
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.maximum-am-resource-percent=0.1
|
||||||
|
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.resource-calculator=org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator
|
||||||
|
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.queues=default
|
||||||
|
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.capacity=100
|
||||||
|
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.user-limit-factor=1
|
||||||
|
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.maximum-capacity=100
|
||||||
|
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.state=RUNNING
|
||||||
|
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.acl_submit_applications=*
|
||||||
|
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.acl_administer_queue=*
|
||||||
|
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.node-locality-delay=40
|
||||||
|
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.queue-mappings=
|
||||||
|
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.queue-mappings-override.enable=false
|
||||||
|
|
||||||
|
LOG4J.PROPERTIES_log4j.rootLogger=INFO, stdout
|
||||||
|
LOG4J.PROPERTIES_log4j.appender.stdout=org.apache.log4j.ConsoleAppender
|
||||||
|
LOG4J.PROPERTIES_log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
|
||||||
|
LOG4J.PROPERTIES_log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
|
46
hadoop-dist/src/main/compose/hadoop/docker-compose.yaml
Normal file
46
hadoop-dist/src/main/compose/hadoop/docker-compose.yaml
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
version: "3.8"
|
||||||
|
|
||||||
|
x-common-config:
|
||||||
|
&common-config
|
||||||
|
image: ${HADOOP_RUNNER_IMAGE}:${HADOOP_RUNNER_VERSION}
|
||||||
|
volumes:
|
||||||
|
- ../..:/opt/hadoop
|
||||||
|
env_file:
|
||||||
|
- ./config
|
||||||
|
|
||||||
|
services:
|
||||||
|
namenode:
|
||||||
|
<<: *common-config
|
||||||
|
hostname: namenode
|
||||||
|
command: ["hdfs", "namenode"]
|
||||||
|
ports:
|
||||||
|
- 9870:9870
|
||||||
|
environment:
|
||||||
|
ENSURE_NAMENODE_DIR: "/tmp/hadoop-root/dfs/name"
|
||||||
|
datanode:
|
||||||
|
<<: *common-config
|
||||||
|
command: ["hdfs", "datanode"]
|
||||||
|
resourcemanager:
|
||||||
|
<<: *common-config
|
||||||
|
hostname: resourcemanager
|
||||||
|
command: ["yarn", "resourcemanager"]
|
||||||
|
ports:
|
||||||
|
- 8088:8088
|
||||||
|
nodemanager:
|
||||||
|
<<: *common-config
|
||||||
|
command: ["yarn", "nodemanager"]
|
26
hadoop-dist/src/main/docker/Dockerfile
Normal file
26
hadoop-dist/src/main/docker/Dockerfile
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
FROM apache/hadoop-runner:@docker.hadoop-runner.version@
|
||||||
|
|
||||||
|
COPY . /opt/hadoop
|
||||||
|
|
||||||
|
WORKDIR /opt/hadoop
|
||||||
|
|
||||||
|
USER root
|
||||||
|
|
||||||
|
RUN chown -R hadoop:users /opt/hadoop
|
||||||
|
|
||||||
|
USER hadoop
|
14
pom.xml
14
pom.xml
@ -82,6 +82,8 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x
|
|||||||
<!-- required as child projects with different version can't use ${project.version} -->
|
<!-- required as child projects with different version can't use ${project.version} -->
|
||||||
<hadoop.version>3.5.0-SNAPSHOT</hadoop.version>
|
<hadoop.version>3.5.0-SNAPSHOT</hadoop.version>
|
||||||
|
|
||||||
|
<docker.image>apache/hadoop:${project.version}</docker.image>
|
||||||
|
|
||||||
<distMgmtSnapshotsId>apache.snapshots.https</distMgmtSnapshotsId>
|
<distMgmtSnapshotsId>apache.snapshots.https</distMgmtSnapshotsId>
|
||||||
<distMgmtSnapshotsName>Apache Development Snapshot Repository</distMgmtSnapshotsName>
|
<distMgmtSnapshotsName>Apache Development Snapshot Repository</distMgmtSnapshotsName>
|
||||||
<distMgmtSnapshotsUrl>https://repository.apache.org/content/repositories/snapshots</distMgmtSnapshotsUrl>
|
<distMgmtSnapshotsUrl>https://repository.apache.org/content/repositories/snapshots</distMgmtSnapshotsUrl>
|
||||||
@ -119,6 +121,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x
|
|||||||
<jsonschema2pojo-maven-plugin.version>1.1.1</jsonschema2pojo-maven-plugin.version>
|
<jsonschema2pojo-maven-plugin.version>1.1.1</jsonschema2pojo-maven-plugin.version>
|
||||||
<maven-compiler-plugin.version>3.10.1</maven-compiler-plugin.version>
|
<maven-compiler-plugin.version>3.10.1</maven-compiler-plugin.version>
|
||||||
<cyclonedx.version>2.7.10</cyclonedx.version>
|
<cyclonedx.version>2.7.10</cyclonedx.version>
|
||||||
|
<docker-maven-plugin.version>0.29.0</docker-maven-plugin.version>
|
||||||
|
|
||||||
<shell-executable>bash</shell-executable>
|
<shell-executable>bash</shell-executable>
|
||||||
|
|
||||||
@ -150,6 +153,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x
|
|||||||
<artifactId>maven-dependency-plugin</artifactId>
|
<artifactId>maven-dependency-plugin</artifactId>
|
||||||
<version>${maven-dependency-plugin.version}</version>
|
<version>${maven-dependency-plugin.version}</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>io.fabric8</groupId>
|
||||||
|
<artifactId>docker-maven-plugin</artifactId>
|
||||||
|
<version>${docker-maven-plugin.version}</version>
|
||||||
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-enforcer-plugin</artifactId>
|
<artifactId>maven-enforcer-plugin</artifactId>
|
||||||
@ -892,5 +900,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x
|
|||||||
</activation>
|
</activation>
|
||||||
</profile>
|
</profile>
|
||||||
|
|
||||||
|
<profile>
|
||||||
|
<id>docker-build</id>
|
||||||
|
<properties>
|
||||||
|
<docker.image>${user.name}/hadoop:${project.version}</docker.image>
|
||||||
|
</properties>
|
||||||
|
</profile>
|
||||||
</profiles>
|
</profiles>
|
||||||
</project>
|
</project>
|
||||||
|
Loading…
Reference in New Issue
Block a user