100 lines
3.7 KiB
Bash
100 lines
3.7 KiB
Bash
|
#!/usr/bin/env bash
|
||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||
|
# or more contributor license agreements. See the NOTICE file
|
||
|
# distributed with this work for additional information
|
||
|
# regarding copyright ownership. The ASF licenses this file
|
||
|
# to you under the Apache License, Version 2.0 (the
|
||
|
# "License"); you may not use this file except in compliance
|
||
|
# with the License. You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
|
||
|
## @description download nvidia docker bin
|
||
|
## @audience public
|
||
|
## @stability stable
|
||
|
function download_nvidia_docker_bin()
|
||
|
{
|
||
|
# download http server
|
||
|
if [[ -n "$DOWNLOAD_HTTP" ]]; then
|
||
|
MY_NVIDIA_DOCKER_RPM_URL="${DOWNLOAD_HTTP}/downloads/nvidia-docker/${NVIDIA_DOCKER_RPM}"
|
||
|
else
|
||
|
MY_NVIDIA_DOCKER_RPM_URL=${NVIDIA_DOCKER_RPM_URL}
|
||
|
fi
|
||
|
|
||
|
if [[ -f "${DOWNLOAD_DIR}/nvidia-docker/${NVIDIA_DOCKER_RPM}" ]]; then
|
||
|
echo "${DOWNLOAD_DIR}/nvidia-docker/${NVIDIA_DOCKER_RPM} is exist."
|
||
|
else
|
||
|
echo "download ${MY_NVIDIA_DOCKER_RPM_URL} ..."
|
||
|
wget -P ${DOWNLOAD_DIR}/nvidia-docker/ ${MY_NVIDIA_DOCKER_RPM_URL}
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
## @description install nvidia docker
|
||
|
## @audience public
|
||
|
## @stability stable
|
||
|
function install_nvidia_docker()
|
||
|
{
|
||
|
download_nvidia_docker_bin
|
||
|
|
||
|
sudo rpm -i ${DOWNLOAD_DIR}/nvidia-docker/${NVIDIA_DOCKER_RPM}
|
||
|
|
||
|
echo -e "\033[32m===== Start nvidia-docker =====\033[0m"
|
||
|
sudo systemctl start nvidia-docker
|
||
|
|
||
|
echo -e "\033[32m===== Check nvidia-docker status =====\033[0m"
|
||
|
systemctl status nvidia-docker
|
||
|
|
||
|
echo -e "\033[32m===== Check nvidia-docker log =====\033[0m"
|
||
|
journalctl -u nvidia-docker
|
||
|
|
||
|
echo -e "\033[32m===== Test nvidia-docker-plugin =====\033[0m"
|
||
|
curl http://localhost:3476/v1.0/docker/cli
|
||
|
|
||
|
# create nvidia driver library path
|
||
|
if [ ! -d "/var/lib/nvidia-docker/volumes/nvidia_driver" ]; then
|
||
|
echo "WARN: /var/lib/nvidia-docker/volumes/nvidia_driver folder path is not exist!"
|
||
|
mkdir -p /var/lib/nvidia-docker/volumes/nvidia_driver
|
||
|
fi
|
||
|
|
||
|
local nvidiaVersion=`get_nvidia_version`
|
||
|
echo -e "\033[31m nvidia detect version is ${nvidiaVersion}\033[0m"
|
||
|
|
||
|
mkdir /var/lib/nvidia-docker/volumes/nvidia_driver/${nvidiaVersion}
|
||
|
mkdir /var/lib/nvidia-docker/volumes/nvidia_driver/${nvidiaVersion}/bin
|
||
|
mkdir /var/lib/nvidia-docker/volumes/nvidia_driver/${nvidiaVersion}/lib64
|
||
|
|
||
|
cp /usr/bin/nvidia* /var/lib/nvidia-docker/volumes/nvidia_driver/${nvidiaVersion}/bin
|
||
|
cp /usr/lib64/libcuda* /var/lib/nvidia-docker/volumes/nvidia_driver/${nvidiaVersion}/lib64
|
||
|
cp /usr/lib64/libnvidia* /var/lib/nvidia-docker/volumes/nvidia_driver/${nvidiaVersion}/lib64
|
||
|
|
||
|
echo -e "\033[32m===== Please manually execute the following command =====\033[0m"
|
||
|
echo -e "\033[32mshell:> nvidia-docker run --rm ${DOCKER_REGISTRY}/nvidia/cuda:9.0-devel nvidia-smi
|
||
|
# If you don't see the list of graphics cards above, the NVIDIA driver installation failed. =====
|
||
|
\033[0m"
|
||
|
|
||
|
echo -e "\033[32m===== Please manually execute the following command =====\033[0m"
|
||
|
echo -e "\033[32m# Test with tf.test.is_gpu_available()
|
||
|
shell:> nvidia-docker run -it ${DOCKER_REGISTRY}/tensorflow/tensorflow:1.9.0-gpu bash
|
||
|
# In docker container
|
||
|
container:> python
|
||
|
python:> import tensorflow as tf
|
||
|
python:> tf.test.is_gpu_available()
|
||
|
python:> exit()
|
||
|
\033[0m"
|
||
|
}
|
||
|
|
||
|
## @description uninstall nvidia docker
|
||
|
## @audience public
|
||
|
## @stability stable
|
||
|
function uninstall_nvidia_docker()
|
||
|
{
|
||
|
echo "This method is not implemented."
|
||
|
}
|
||
|
|