HADOOP-18135. Produce Windows binaries of Hadoop (#6673)

This PR enables one to create the Hadoop
release tarball on Windows, complete with
the native binaries (including winutils.exe).
This PR contains the following changes -

* Prevents splitting during array element
  expansion - this is needed since we need
  to pass the arguments correctly to maven.
* Install Python 3.11.8 and pip to the
  Windows docker image for building
  Hadoop.
* pom file changes to get maven to invoke
  the releasedocmaker script through
  bash.exe on Windows.
This commit is contained in:
Gautham B A 2024-04-09 22:15:05 +05:30 committed by GitHub
parent 3f8af73913
commit f7bb4f1595
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 142 additions and 13 deletions

View File

@ -653,6 +653,20 @@ container.
-Dwindows.cmake.toolchain.file=C:\vcpkg\scripts\buildsystems\vcpkg.cmake -Dwindows.cmake.build.type=RelWithDebInfo^
-Dwindows.build.hdfspp.dll=off -Dwindows.no.sasl=on -Duse.platformToolsetVersion=v142
Building the release tarball:
Assuming that we're still running in the Docker container hadoop-windows-10-builder, run the
following command to create the Apache Hadoop release tarball -
> set IS_WINDOWS=1
> set MVN_ARGS="-Dshell-executable=C:\Git\bin\bash.exe -Dhttps.protocols=TLSv1.2 -Pnative-win -Drequire.openssl -Dopenssl.prefix=C:\vcpkg\installed\x64-windows -Dcmake.prefix.path=C:\vcpkg\installed\x64-windows -Dwindows.cmake.toolchain.file=C:\vcpkg\scripts\buildsystems\vcpkg.cmake -Dwindows.cmake.build.type=RelWithDebInfo -Dwindows.build.hdfspp.dll=off -Duse.platformToolsetVersion=v142 -Dwindows.no.sasl=on -DskipTests -DskipDocs -Drequire.test.libhadoop"
> C:\Git\bin\bash.exe C:\hadoop\dev-support\bin\create-release --mvnargs=%MVN_ARGS%
Note:
If the building fails due to an issue with long paths, rename the Hadoop root directory to just a
letter (like 'h') and rebuild -
> C:\Git\bin\bash.exe C:\h\dev-support\bin\create-release --mvnargs=%MVN_ARGS%
----------------------------------------------------------------------------------
Building distributions:

View File

@ -418,7 +418,8 @@ function option_parse
fi
fi
if [ -n "$MVNEXTRAARGS" ]; then
MVN_ARGS+=("$MVNEXTRAARGS")
# shellcheck disable=SC2206
MVN_ARGS+=(${MVNEXTRAARGS[*]})
fi
if [[ "${SECURITYRELEASE}" = true ]]; then
@ -552,10 +553,12 @@ function makearelease
mkdir -p "${LOGDIR}"
# Install the Hadoop maven plugins first
run_and_redirect "${LOGDIR}/mvn_install_maven_plugins.log" "${MVN}" "${MVN_ARGS[@]}" -pl hadoop-maven-plugins -am clean install
# shellcheck disable=SC2086
run_and_redirect "${LOGDIR}/mvn_install_maven_plugins.log" "${MVN}" ${MVN_ARGS[*]} -pl hadoop-maven-plugins -am clean install
# mvn clean for sanity
run_and_redirect "${LOGDIR}/mvn_clean.log" "${MVN}" "${MVN_ARGS[@]}" clean
# shellcheck disable=SC2086
run_and_redirect "${LOGDIR}/mvn_clean.log" "${MVN}" ${MVN_ARGS[*]} clean
# Create staging dir for release artifacts
run mkdir -p "${ARTIFACTS_DIR}"
@ -563,7 +566,8 @@ function makearelease
big_console_header "Apache RAT Check"
# Create RAT report
run_and_redirect "${LOGDIR}/mvn_apache_rat.log" "${MVN}" "${MVN_ARGS[@]}" apache-rat:check
# shellcheck disable=SC2086
run_and_redirect "${LOGDIR}/mvn_apache_rat.log" "${MVN}" ${MVN_ARGS[*]} apache-rat:check
big_console_header "Maven Build and Install"
@ -577,9 +581,9 @@ function makearelease
fi
# Create SRC and BIN tarballs for release,
# shellcheck disable=SC2046
# shellcheck disable=SC2046,SC2086
run_and_redirect "${LOGDIR}/mvn_${target}.log" \
"${MVN}" "${MVN_ARGS[@]}" ${target} \
"${MVN}" ${MVN_ARGS[*]} ${target} \
-Pdist,src,yarn-ui \
"${signflags[@]}" \
-DskipTests -Dtar $(hadoop_native_flags)
@ -608,8 +612,9 @@ function makearelease
# we need to do install again so that jdiff and
# a few other things get registered in the maven
# universe correctly
# shellcheck disable=SC2206,SC2086
run_and_redirect "${LOGDIR}/mvn_site.log" \
"${MVN}" "${MVN_ARGS[@]}" install \
"${MVN}" ${MVN_ARGS[*]} install \
site site:stage \
-DskipTests \
-DskipShade \

View File

@ -102,10 +102,21 @@ RUN powershell Copy-Item -Path "C:\LibXXHash\usr\bin\*.dll" -Destination "C:\Pro
RUN powershell Copy-Item -Path "C:\LibZStd\usr\bin\*.dll" -Destination "C:\Program` Files\Git\usr\bin"
RUN powershell Copy-Item -Path "C:\RSync\usr\bin\*" -Destination "C:\Program` Files\Git\usr\bin"
# Install Python 3.10.11.
RUN powershell Invoke-WebRequest -Uri https://www.python.org/ftp/python/3.10.11/python-3.10.11-embed-amd64.zip -OutFile $Env:TEMP\python-3.10.11-embed-amd64.zip
RUN powershell Expand-Archive -Path $Env:TEMP\python-3.10.11-embed-amd64.zip -DestinationPath "C:\Python3"
RUN powershell New-Item -ItemType HardLink -Value "C:\Python3\python.exe" -Path "C:\Python3\python3.exe"
COPY pkg-resolver pkg-resolver
## Install Python 3.11.8.
# The Python installation steps below are derived from -
# https://github.com/docker-library/python/blob/105d6f34e7d70aad6f8c3e249b8208efa591916a/3.11/windows/windowsservercore-ltsc2022/Dockerfile
ENV PYTHONIOENCODING UTF-8
ENV PYTHON_VERSION 3.11.8
ENV PYTHON_PIP_VERSION 24.0
ENV PYTHON_SETUPTOOLS_VERSION 65.5.1
ENV PYTHON_GET_PIP_URL https://github.com/pypa/get-pip/raw/dbf0c85f76fb6e1ab42aa672ffca6f0a675d9ee4/public/get-pip.py
ENV PYTHON_GET_PIP_SHA256 dfe9fd5c28dc98b5ac17979a953ea550cec37ae1b47a5116007395bfacff2ab9
RUN powershell Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
RUN powershell pkg-resolver\install-python.ps1
RUN powershell pkg-resolver\install-pip.ps1
RUN powershell pip install python-dateutil
# Create a user HadoopBuilder with basic privileges and use it for building Hadoop on Windows.
RUN powershell New-LocalUser -Name 'HadoopBuilder' -Description 'User account for building Apache Hadoop' -Password ([securestring]::new()) -AccountNeverExpires -PasswordNeverExpires
@ -121,12 +132,12 @@ USER HadoopBuilder
ENV PROTOBUF_HOME "C:\vcpkg\installed\x64-windows"
ENV JAVA_HOME "C:\Java\zulu8.62.0.19-ca-jdk8.0.332-win_x64"
ENV MAVEN_OPTS '-Xmx2048M -Xss128M'
ENV IS_WINDOWS 1
RUN setx PATH "%PATH%;%ALLUSERSPROFILE%\chocolatey\bin"
RUN setx PATH "%PATH%;%JAVA_HOME%\bin"
RUN setx PATH "%PATH%;C:\Maven\apache-maven-3.8.6\bin"
RUN setx PATH "%PATH%;C:\CMake\cmake-3.19.0-win64-x64\bin"
RUN setx PATH "%PATH%;C:\ZStd"
RUN setx path "%PATH%;C:\Python3"
RUN setx PATH "%PATH%;C:\Program Files\Git\usr\bin"
# We get strange Javadoc errors without this.

View File

@ -0,0 +1,44 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# The code lines below are derived from -
# https://github.com/docker-library/python/blob/105d6f34e7d70aad6f8c3e249b8208efa591916a/3.11/windows/windowsservercore-ltsc2022/Dockerfile
Write-Host ('Downloading get-pip.py ({0}) ...' -f $Env:PYTHON_GET_PIP_URL)
[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
Invoke-WebRequest -Uri $Env:PYTHON_GET_PIP_URL -OutFile 'get-pip.py'
Write-Host ('Verifying sha256 ({0}) ...' -f $Env:PYTHON_GET_PIP_SHA256)
if ((Get-FileHash 'get-pip.py' -Algorithm sha256).Hash -ne $Env:PYTHON_GET_PIP_SHA256) {
Write-Host 'FAILED!'
exit 1
}
$Env:PYTHONDONTWRITEBYTECODE = '1'
Write-Host ('Installing pip=={0} ...' -f $Env:PYTHON_PIP_VERSION)
python get-pip.py `
--disable-pip-version-check `
--no-cache-dir `
--no-compile `
('pip=={0}' -f $Env:PYTHON_PIP_VERSION) `
('setuptools=={0}' -f $Env:PYTHON_SETUPTOOLS_VERSION)
Remove-Item get-pip.py -Force
Write-Host 'Verifying pip install ...'
pip --version
Write-Host 'Complete.'

View File

@ -0,0 +1,54 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# The code lines below are derived from -
# https://github.com/docker-library/python/blob/105d6f34e7d70aad6f8c3e249b8208efa591916a/3.11/windows/windowsservercore-ltsc2022/Dockerfile
$url = ('https://www.python.org/ftp/python/{0}/python-{1}-amd64.exe' -f ($Env:PYTHON_VERSION -replace '[a-z]+[0-9]*$', ''), $Env:PYTHON_VERSION)
Write-Host ('Downloading {0} ...' -f $url)
[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
Invoke-WebRequest -Uri $url -OutFile 'python.exe'
Write-Host 'Installing ...'
$exitCode = (Start-Process python.exe -Wait -NoNewWindow -PassThru `
-ArgumentList @(
'/quiet',
'InstallAllUsers=1',
'TargetDir=C:\Python',
'PrependPath=1',
'Shortcuts=0',
'Include_doc=0',
'Include_pip=0',
'Include_test=0'
)
).ExitCode
if ($exitCode -ne 0) {
Write-Host ('Running python installer failed with exit code: {0}' -f $exitCode)
Get-ChildItem $Env:TEMP | Sort-Object -Descending -Property LastWriteTime | Select-Object -First 1 | Get-Content
exit $exitCode
}
# the installer updated PATH, so we should refresh our local value
$Env:PATH = [Environment]::GetEnvironmentVariable('PATH', [EnvironmentVariableTarget]::Machine)
Write-Host 'Verifying install ...'
Write-Host "python --version $(python --version)"
Write-Host 'Removing ...'
Remove-Item python.exe -Force
Remove-Item $Env:TEMP\Python*.log -Force
Write-Host 'Complete.'

View File

@ -1095,8 +1095,9 @@
<goal>exec</goal>
</goals>
<configuration>
<executable>${basedir}/../../dev-support/bin/releasedocmaker</executable>
<executable>${shell-executable}</executable>
<arguments>
<argument>${basedir}/../../dev-support/bin/releasedocmaker</argument>
<argument>--index</argument>
<argument>--license</argument>
<argument>--outputdir</argument>