From f7bb4f1595666d5bbae1967f3ff6cf44ae7e47c2 Mon Sep 17 00:00:00 2001 From: Gautham B A Date: Tue, 9 Apr 2024 22:15:05 +0530 Subject: [PATCH] HADOOP-18135. Produce Windows binaries of Hadoop (#6673) This PR enables one to create the Hadoop release tarball on Windows, complete with the native binaries (including winutils.exe). This PR contains the following changes - * Prevents splitting during array element expansion - this is needed since we need to pass the arguments correctly to maven. * Install Python 3.11.8 and pip to the Windows docker image for building Hadoop. * pom file changes to get maven to invoke the releasedocmaker script through bash.exe on Windows. --- BUILDING.txt | 14 +++++ dev-support/bin/create-release | 19 ++++--- dev-support/docker/Dockerfile_windows_10 | 21 ++++++-- .../docker/pkg-resolver/install-pip.ps1 | 44 +++++++++++++++ .../docker/pkg-resolver/install-python.ps1 | 54 +++++++++++++++++++ hadoop-common-project/hadoop-common/pom.xml | 3 +- 6 files changed, 142 insertions(+), 13 deletions(-) create mode 100644 dev-support/docker/pkg-resolver/install-pip.ps1 create mode 100644 dev-support/docker/pkg-resolver/install-python.ps1 diff --git a/BUILDING.txt b/BUILDING.txt index 3d35007c9c..ceac4213b9 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -653,6 +653,20 @@ container. -Dwindows.cmake.toolchain.file=C:\vcpkg\scripts\buildsystems\vcpkg.cmake -Dwindows.cmake.build.type=RelWithDebInfo^ -Dwindows.build.hdfspp.dll=off -Dwindows.no.sasl=on -Duse.platformToolsetVersion=v142 +Building the release tarball: +Assuming that we're still running in the Docker container hadoop-windows-10-builder, run the +following command to create the Apache Hadoop release tarball - + +> set IS_WINDOWS=1 +> set MVN_ARGS="-Dshell-executable=C:\Git\bin\bash.exe -Dhttps.protocols=TLSv1.2 -Pnative-win -Drequire.openssl -Dopenssl.prefix=C:\vcpkg\installed\x64-windows -Dcmake.prefix.path=C:\vcpkg\installed\x64-windows -Dwindows.cmake.toolchain.file=C:\vcpkg\scripts\buildsystems\vcpkg.cmake -Dwindows.cmake.build.type=RelWithDebInfo -Dwindows.build.hdfspp.dll=off -Duse.platformToolsetVersion=v142 -Dwindows.no.sasl=on -DskipTests -DskipDocs -Drequire.test.libhadoop" +> C:\Git\bin\bash.exe C:\hadoop\dev-support\bin\create-release --mvnargs=%MVN_ARGS% + +Note: +If the building fails due to an issue with long paths, rename the Hadoop root directory to just a +letter (like 'h') and rebuild - + +> C:\Git\bin\bash.exe C:\h\dev-support\bin\create-release --mvnargs=%MVN_ARGS% + ---------------------------------------------------------------------------------- Building distributions: diff --git a/dev-support/bin/create-release b/dev-support/bin/create-release index 274250f0b7..8cdcc14acf 100755 --- a/dev-support/bin/create-release +++ b/dev-support/bin/create-release @@ -418,7 +418,8 @@ function option_parse fi fi if [ -n "$MVNEXTRAARGS" ]; then - MVN_ARGS+=("$MVNEXTRAARGS") + # shellcheck disable=SC2206 + MVN_ARGS+=(${MVNEXTRAARGS[*]}) fi if [[ "${SECURITYRELEASE}" = true ]]; then @@ -552,10 +553,12 @@ function makearelease mkdir -p "${LOGDIR}" # Install the Hadoop maven plugins first - run_and_redirect "${LOGDIR}/mvn_install_maven_plugins.log" "${MVN}" "${MVN_ARGS[@]}" -pl hadoop-maven-plugins -am clean install + # shellcheck disable=SC2086 + run_and_redirect "${LOGDIR}/mvn_install_maven_plugins.log" "${MVN}" ${MVN_ARGS[*]} -pl hadoop-maven-plugins -am clean install # mvn clean for sanity - run_and_redirect "${LOGDIR}/mvn_clean.log" "${MVN}" "${MVN_ARGS[@]}" clean + # shellcheck disable=SC2086 + run_and_redirect "${LOGDIR}/mvn_clean.log" "${MVN}" ${MVN_ARGS[*]} clean # Create staging dir for release artifacts run mkdir -p "${ARTIFACTS_DIR}" @@ -563,7 +566,8 @@ function makearelease big_console_header "Apache RAT Check" # Create RAT report - run_and_redirect "${LOGDIR}/mvn_apache_rat.log" "${MVN}" "${MVN_ARGS[@]}" apache-rat:check + # shellcheck disable=SC2086 + run_and_redirect "${LOGDIR}/mvn_apache_rat.log" "${MVN}" ${MVN_ARGS[*]} apache-rat:check big_console_header "Maven Build and Install" @@ -577,9 +581,9 @@ function makearelease fi # Create SRC and BIN tarballs for release, - # shellcheck disable=SC2046 + # shellcheck disable=SC2046,SC2086 run_and_redirect "${LOGDIR}/mvn_${target}.log" \ - "${MVN}" "${MVN_ARGS[@]}" ${target} \ + "${MVN}" ${MVN_ARGS[*]} ${target} \ -Pdist,src,yarn-ui \ "${signflags[@]}" \ -DskipTests -Dtar $(hadoop_native_flags) @@ -608,8 +612,9 @@ function makearelease # we need to do install again so that jdiff and # a few other things get registered in the maven # universe correctly + # shellcheck disable=SC2206,SC2086 run_and_redirect "${LOGDIR}/mvn_site.log" \ - "${MVN}" "${MVN_ARGS[@]}" install \ + "${MVN}" ${MVN_ARGS[*]} install \ site site:stage \ -DskipTests \ -DskipShade \ diff --git a/dev-support/docker/Dockerfile_windows_10 b/dev-support/docker/Dockerfile_windows_10 index 105529c5d6..cde224d8a4 100644 --- a/dev-support/docker/Dockerfile_windows_10 +++ b/dev-support/docker/Dockerfile_windows_10 @@ -102,10 +102,21 @@ RUN powershell Copy-Item -Path "C:\LibXXHash\usr\bin\*.dll" -Destination "C:\Pro RUN powershell Copy-Item -Path "C:\LibZStd\usr\bin\*.dll" -Destination "C:\Program` Files\Git\usr\bin" RUN powershell Copy-Item -Path "C:\RSync\usr\bin\*" -Destination "C:\Program` Files\Git\usr\bin" -# Install Python 3.10.11. -RUN powershell Invoke-WebRequest -Uri https://www.python.org/ftp/python/3.10.11/python-3.10.11-embed-amd64.zip -OutFile $Env:TEMP\python-3.10.11-embed-amd64.zip -RUN powershell Expand-Archive -Path $Env:TEMP\python-3.10.11-embed-amd64.zip -DestinationPath "C:\Python3" -RUN powershell New-Item -ItemType HardLink -Value "C:\Python3\python.exe" -Path "C:\Python3\python3.exe" +COPY pkg-resolver pkg-resolver + +## Install Python 3.11.8. +# The Python installation steps below are derived from - +# https://github.com/docker-library/python/blob/105d6f34e7d70aad6f8c3e249b8208efa591916a/3.11/windows/windowsservercore-ltsc2022/Dockerfile +ENV PYTHONIOENCODING UTF-8 +ENV PYTHON_VERSION 3.11.8 +ENV PYTHON_PIP_VERSION 24.0 +ENV PYTHON_SETUPTOOLS_VERSION 65.5.1 +ENV PYTHON_GET_PIP_URL https://github.com/pypa/get-pip/raw/dbf0c85f76fb6e1ab42aa672ffca6f0a675d9ee4/public/get-pip.py +ENV PYTHON_GET_PIP_SHA256 dfe9fd5c28dc98b5ac17979a953ea550cec37ae1b47a5116007395bfacff2ab9 +RUN powershell Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser +RUN powershell pkg-resolver\install-python.ps1 +RUN powershell pkg-resolver\install-pip.ps1 +RUN powershell pip install python-dateutil # Create a user HadoopBuilder with basic privileges and use it for building Hadoop on Windows. RUN powershell New-LocalUser -Name 'HadoopBuilder' -Description 'User account for building Apache Hadoop' -Password ([securestring]::new()) -AccountNeverExpires -PasswordNeverExpires @@ -121,12 +132,12 @@ USER HadoopBuilder ENV PROTOBUF_HOME "C:\vcpkg\installed\x64-windows" ENV JAVA_HOME "C:\Java\zulu8.62.0.19-ca-jdk8.0.332-win_x64" ENV MAVEN_OPTS '-Xmx2048M -Xss128M' +ENV IS_WINDOWS 1 RUN setx PATH "%PATH%;%ALLUSERSPROFILE%\chocolatey\bin" RUN setx PATH "%PATH%;%JAVA_HOME%\bin" RUN setx PATH "%PATH%;C:\Maven\apache-maven-3.8.6\bin" RUN setx PATH "%PATH%;C:\CMake\cmake-3.19.0-win64-x64\bin" RUN setx PATH "%PATH%;C:\ZStd" -RUN setx path "%PATH%;C:\Python3" RUN setx PATH "%PATH%;C:\Program Files\Git\usr\bin" # We get strange Javadoc errors without this. diff --git a/dev-support/docker/pkg-resolver/install-pip.ps1 b/dev-support/docker/pkg-resolver/install-pip.ps1 new file mode 100644 index 0000000000..c56ac70549 --- /dev/null +++ b/dev-support/docker/pkg-resolver/install-pip.ps1 @@ -0,0 +1,44 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# The code lines below are derived from - +# https://github.com/docker-library/python/blob/105d6f34e7d70aad6f8c3e249b8208efa591916a/3.11/windows/windowsservercore-ltsc2022/Dockerfile + +Write-Host ('Downloading get-pip.py ({0}) ...' -f $Env:PYTHON_GET_PIP_URL) +[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 +Invoke-WebRequest -Uri $Env:PYTHON_GET_PIP_URL -OutFile 'get-pip.py' +Write-Host ('Verifying sha256 ({0}) ...' -f $Env:PYTHON_GET_PIP_SHA256) +if ((Get-FileHash 'get-pip.py' -Algorithm sha256).Hash -ne $Env:PYTHON_GET_PIP_SHA256) { + Write-Host 'FAILED!' + exit 1 +} + +$Env:PYTHONDONTWRITEBYTECODE = '1' + +Write-Host ('Installing pip=={0} ...' -f $Env:PYTHON_PIP_VERSION) +python get-pip.py ` + --disable-pip-version-check ` + --no-cache-dir ` + --no-compile ` +('pip=={0}' -f $Env:PYTHON_PIP_VERSION) ` +('setuptools=={0}' -f $Env:PYTHON_SETUPTOOLS_VERSION) + +Remove-Item get-pip.py -Force + +Write-Host 'Verifying pip install ...' +pip --version + +Write-Host 'Complete.' diff --git a/dev-support/docker/pkg-resolver/install-python.ps1 b/dev-support/docker/pkg-resolver/install-python.ps1 new file mode 100644 index 0000000000..7c866104a7 --- /dev/null +++ b/dev-support/docker/pkg-resolver/install-python.ps1 @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# The code lines below are derived from - +# https://github.com/docker-library/python/blob/105d6f34e7d70aad6f8c3e249b8208efa591916a/3.11/windows/windowsservercore-ltsc2022/Dockerfile + +$url = ('https://www.python.org/ftp/python/{0}/python-{1}-amd64.exe' -f ($Env:PYTHON_VERSION -replace '[a-z]+[0-9]*$', ''), $Env:PYTHON_VERSION) +Write-Host ('Downloading {0} ...' -f $url) +[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 +Invoke-WebRequest -Uri $url -OutFile 'python.exe' + +Write-Host 'Installing ...' +$exitCode = (Start-Process python.exe -Wait -NoNewWindow -PassThru ` + -ArgumentList @( + '/quiet', + 'InstallAllUsers=1', + 'TargetDir=C:\Python', + 'PrependPath=1', + 'Shortcuts=0', + 'Include_doc=0', + 'Include_pip=0', + 'Include_test=0' + ) +).ExitCode +if ($exitCode -ne 0) { + Write-Host ('Running python installer failed with exit code: {0}' -f $exitCode) + Get-ChildItem $Env:TEMP | Sort-Object -Descending -Property LastWriteTime | Select-Object -First 1 | Get-Content + exit $exitCode +} + +# the installer updated PATH, so we should refresh our local value +$Env:PATH = [Environment]::GetEnvironmentVariable('PATH', [EnvironmentVariableTarget]::Machine) + +Write-Host 'Verifying install ...' +Write-Host "python --version $(python --version)" + +Write-Host 'Removing ...' +Remove-Item python.exe -Force +Remove-Item $Env:TEMP\Python*.log -Force + +Write-Host 'Complete.' diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 9f6c91a36a..a7dcbb24a9 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -1095,8 +1095,9 @@ exec - ${basedir}/../../dev-support/bin/releasedocmaker + ${shell-executable} + ${basedir}/../../dev-support/bin/releasedocmaker --index --license --outputdir