HADOOP-18682. Move hadoop docker scripts under the main source code (#6483). Contributed by Christos Bisias.
This commit is contained in:
parent
e4789a2fd3
commit
66baf1eb51
@ -0,0 +1,68 @@
|
||||
<!---
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
## Hadoop Docker
|
||||
|
||||
### Running from existing setups
|
||||
|
||||
There are special branches for running hadoop in docker.
|
||||
|
||||
The `docker-hadoop-runner*` branches contain scripts that set up base images that can be used for running any Hadoop version.
|
||||
|
||||
* [docker-hadoop-runner-latest](https://github.com/apache/hadoop/tree/docker-hadoop-runner-latest)
|
||||
* [docker-hadoop-runner-jdk11](https://github.com/apache/hadoop/tree/docker-hadoop-runner-jdk11)
|
||||
* [docker-hadoop-runner-jdk8](https://github.com/apache/hadoop/tree/docker-hadoop-runner-jdk8)
|
||||
* [docker-hadoop-runner](https://github.com/apache/hadoop/tree/docker-hadoop-runner)
|
||||
|
||||
The `docker-hadoop*` branches can be used for running a specific version.
|
||||
|
||||
* [docker-hadoop-3](https://github.com/apache/hadoop/tree/docker-hadoop-3)
|
||||
* `hadoop-3.3.6`
|
||||
* [docker-hadoop-2](https://github.com/apache/hadoop/tree/docker-hadoop-2)
|
||||
* `hadoop-2.10.2`
|
||||
|
||||
### Running from the source code
|
||||
|
||||
There is a setup under `hadoop-dist` that contains Docker Compose definitions
|
||||
for running the current version of Hadoop in a multi-node docker environment.
|
||||
|
||||
This is meant for testing code changes locally and debugging.
|
||||
|
||||
The base image used by the Docker setup is built as part of the maven lifecycle.
|
||||
The distribution files generated while building the project with the `-Pdist` profile enabled,
|
||||
will be used for running hadoop inside the containers.
|
||||
|
||||
In order to start the docker environment you need to do the following
|
||||
* Build the project, using the `-Pdist` profile
|
||||
```shell
|
||||
> mvn clean install -Dmaven.javadoc.skip=true -DskipTests -DskipShade -Pdist,src
|
||||
```
|
||||
* From the project root, navigate under the docker-compose dir under the generated dist directory
|
||||
```shell
|
||||
> cd hadoop-dist/target/hadoop-<current-version>/compose/hadoop
|
||||
```
|
||||
* Start the docker environment
|
||||
```shell
|
||||
> docker-compose up -d --scale datanode=3
|
||||
```
|
||||
* Connect to a container to execute commands
|
||||
```shell
|
||||
> docker exec -it hadoop_datanode_1 bash
|
||||
bash-4.2$ hdfs dfs -mkdir /test
|
||||
```
|
||||
|
||||
### Config files
|
||||
|
||||
To add or remove properties from the `core-site.xml`, `hdfs-site.xml`, etc. files used in the docker environment,
|
||||
simply edit the `config` file before starting the containers. The changes will be persisted in the docker environment.
|
@ -236,3 +236,9 @@ Fully-Distributed Operation
|
||||
---------------------------
|
||||
|
||||
For information on setting up fully-distributed, non-trivial clusters see [Cluster Setup](./ClusterSetup.html).
|
||||
|
||||
Hadoop in Docker containers
|
||||
---------------------------
|
||||
|
||||
For information on setting up hadoop in docker, using either official releases or the main source code,
|
||||
check [Hadoop Docker](./HadoopDocker.html).
|
||||
|
@ -29,6 +29,13 @@
|
||||
<name>Apache Hadoop Distribution</name>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<properties>
|
||||
<file.encoding>UTF-8</file.encoding>
|
||||
<downloadSources>true</downloadSources>
|
||||
<docker.hadoop-runner.version>docker-hadoop-runner</docker.hadoop-runner.version>
|
||||
<maven.test.skip>true</maven.test.skip>
|
||||
</properties>
|
||||
|
||||
<!-- Using dependencies to ensure this module is the last one -->
|
||||
<dependencies>
|
||||
<dependency>
|
||||
@ -151,6 +158,43 @@
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-resources-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>copy-compose-files</id>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>copy-resources</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<outputDirectory>${project.build.directory}/hadoop-${project.version}/compose</outputDirectory>
|
||||
<resources>
|
||||
<resource>
|
||||
<directory>src/main/compose</directory>
|
||||
<filtering>true</filtering>
|
||||
</resource>
|
||||
</resources>
|
||||
</configuration>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>copy-and-filter-dockerfile</id>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>copy-resources</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<outputDirectory>${project.build.directory}/hadoop-${project.version}</outputDirectory>
|
||||
<resources>
|
||||
<resource>
|
||||
<directory>src/main/docker</directory>
|
||||
<filtering>true</filtering>
|
||||
</resource>
|
||||
</resources>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
@ -230,6 +274,56 @@
|
||||
</plugins>
|
||||
</build>
|
||||
</profile>
|
||||
<profile>
|
||||
<id>docker-build</id>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>io.fabric8</groupId>
|
||||
<artifactId>docker-maven-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>build</goal>
|
||||
</goals>
|
||||
<phase>package</phase>
|
||||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<images>
|
||||
<image>
|
||||
<name>${docker.image}</name>
|
||||
<build>
|
||||
<dockerFileDir>
|
||||
${project.build.directory}/hadoop-${project.version}
|
||||
</dockerFileDir>
|
||||
</build>
|
||||
</image>
|
||||
</images>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</profile>
|
||||
<profile>
|
||||
<id>docker-push</id>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>io.fabric8</groupId>
|
||||
<artifactId>docker-maven-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>push</goal>
|
||||
</goals>
|
||||
<phase>package</phase>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</profile>
|
||||
</profiles>
|
||||
|
||||
</project>
|
||||
|
18
hadoop-dist/src/main/compose/hadoop/.env
Normal file
18
hadoop-dist/src/main/compose/hadoop/.env
Normal file
@ -0,0 +1,18 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
HADOOP_IMAGE=apache/hadoop
|
||||
HADOOP_RUNNER_VERSION=${docker.hadoop-runner.version}
|
||||
HADOOP_RUNNER_IMAGE=apache/hadoop-runner
|
50
hadoop-dist/src/main/compose/hadoop/config
Normal file
50
hadoop-dist/src/main/compose/hadoop/config
Normal file
@ -0,0 +1,50 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
CORE-SITE.XML_fs.default.name=hdfs://namenode
|
||||
CORE-SITE.XML_fs.defaultFS=hdfs://namenode
|
||||
|
||||
HDFS-SITE.XML_dfs.namenode.rpc-address=namenode:8020
|
||||
HDFS-SITE.XML_dfs.replication=1
|
||||
|
||||
MAPRED-SITE.XML_mapreduce.framework.name=yarn
|
||||
MAPRED-SITE.XML_yarn.app.mapreduce.am.env=HADOOP_MAPRED_HOME=$HADOOP_HOME
|
||||
MAPRED-SITE.XML_mapreduce.map.env=HADOOP_MAPRED_HOME=$HADOOP_HOME
|
||||
MAPRED-SITE.XML_mapreduce.reduce.env=HADOOP_MAPRED_HOME=$HADOOP_HOME
|
||||
|
||||
YARN-SITE.XML_yarn.resourcemanager.hostname=resourcemanager
|
||||
YARN-SITE.XML_yarn.nodemanager.pmem-check-enabled=false
|
||||
YARN-SITE.XML_yarn.nodemanager.delete.debug-delay-sec=600
|
||||
YARN-SITE.XML_yarn.nodemanager.vmem-check-enabled=false
|
||||
YARN-SITE.XML_yarn.nodemanager.aux-services=mapreduce_shuffle
|
||||
|
||||
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.maximum-applications=10000
|
||||
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.maximum-am-resource-percent=0.1
|
||||
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.resource-calculator=org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator
|
||||
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.queues=default
|
||||
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.capacity=100
|
||||
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.user-limit-factor=1
|
||||
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.maximum-capacity=100
|
||||
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.state=RUNNING
|
||||
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.acl_submit_applications=*
|
||||
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.acl_administer_queue=*
|
||||
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.node-locality-delay=40
|
||||
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.queue-mappings=
|
||||
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.queue-mappings-override.enable=false
|
||||
|
||||
LOG4J.PROPERTIES_log4j.rootLogger=INFO, stdout
|
||||
LOG4J.PROPERTIES_log4j.appender.stdout=org.apache.log4j.ConsoleAppender
|
||||
LOG4J.PROPERTIES_log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
|
||||
LOG4J.PROPERTIES_log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
|
46
hadoop-dist/src/main/compose/hadoop/docker-compose.yaml
Normal file
46
hadoop-dist/src/main/compose/hadoop/docker-compose.yaml
Normal file
@ -0,0 +1,46 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
version: "3.8"
|
||||
|
||||
x-common-config:
|
||||
&common-config
|
||||
image: ${HADOOP_RUNNER_IMAGE}:${HADOOP_RUNNER_VERSION}
|
||||
volumes:
|
||||
- ../..:/opt/hadoop
|
||||
env_file:
|
||||
- ./config
|
||||
|
||||
services:
|
||||
namenode:
|
||||
<<: *common-config
|
||||
hostname: namenode
|
||||
command: ["hdfs", "namenode"]
|
||||
ports:
|
||||
- 9870:9870
|
||||
environment:
|
||||
ENSURE_NAMENODE_DIR: "/tmp/hadoop-root/dfs/name"
|
||||
datanode:
|
||||
<<: *common-config
|
||||
command: ["hdfs", "datanode"]
|
||||
resourcemanager:
|
||||
<<: *common-config
|
||||
hostname: resourcemanager
|
||||
command: ["yarn", "resourcemanager"]
|
||||
ports:
|
||||
- 8088:8088
|
||||
nodemanager:
|
||||
<<: *common-config
|
||||
command: ["yarn", "nodemanager"]
|
26
hadoop-dist/src/main/docker/Dockerfile
Normal file
26
hadoop-dist/src/main/docker/Dockerfile
Normal file
@ -0,0 +1,26 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
FROM apache/hadoop-runner:@docker.hadoop-runner.version@
|
||||
|
||||
COPY . /opt/hadoop
|
||||
|
||||
WORKDIR /opt/hadoop
|
||||
|
||||
USER root
|
||||
|
||||
RUN chown -R hadoop:users /opt/hadoop
|
||||
|
||||
USER hadoop
|
14
pom.xml
14
pom.xml
@ -82,6 +82,8 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x
|
||||
<!-- required as child projects with different version can't use ${project.version} -->
|
||||
<hadoop.version>3.5.0-SNAPSHOT</hadoop.version>
|
||||
|
||||
<docker.image>apache/hadoop:${project.version}</docker.image>
|
||||
|
||||
<distMgmtSnapshotsId>apache.snapshots.https</distMgmtSnapshotsId>
|
||||
<distMgmtSnapshotsName>Apache Development Snapshot Repository</distMgmtSnapshotsName>
|
||||
<distMgmtSnapshotsUrl>https://repository.apache.org/content/repositories/snapshots</distMgmtSnapshotsUrl>
|
||||
@ -119,6 +121,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x
|
||||
<jsonschema2pojo-maven-plugin.version>1.1.1</jsonschema2pojo-maven-plugin.version>
|
||||
<maven-compiler-plugin.version>3.10.1</maven-compiler-plugin.version>
|
||||
<cyclonedx.version>2.7.10</cyclonedx.version>
|
||||
<docker-maven-plugin.version>0.29.0</docker-maven-plugin.version>
|
||||
|
||||
<shell-executable>bash</shell-executable>
|
||||
|
||||
@ -150,6 +153,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x
|
||||
<artifactId>maven-dependency-plugin</artifactId>
|
||||
<version>${maven-dependency-plugin.version}</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>io.fabric8</groupId>
|
||||
<artifactId>docker-maven-plugin</artifactId>
|
||||
<version>${docker-maven-plugin.version}</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-enforcer-plugin</artifactId>
|
||||
@ -892,5 +900,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x
|
||||
</activation>
|
||||
</profile>
|
||||
|
||||
<profile>
|
||||
<id>docker-build</id>
|
||||
<properties>
|
||||
<docker.image>${user.name}/hadoop:${project.version}</docker.image>
|
||||
</properties>
|
||||
</profile>
|
||||
</profiles>
|
||||
</project>
|
||||
|
Loading…
Reference in New Issue
Block a user