File: //snap/docker/3265/usr/share/nvidia-container-toolkit/lib
# nvidia toolkit related setup funtions #
U_MACHINE="$(uname -m)"
U_OS="$(uname -o)"
U_KERNEL="${U_OS##*/}"
U_USERLAND="${U_OS%%/*}"
ARCH_TRIPLET="${U_MACHINE}-${U_KERNEL,,}-${U_USERLAND,,}"
NVIDIA_SUPPORT_DISABLED="$(snapctl get nvidia-support.disabled)"
if [ -L "/var/lib/snapd/hostfs/usr/lib/${ARCH_TRIPLET}/libcuda.so" ] ; then
NVIDIA_SUPPORT_CLASSIC="true"
else
NVIDIA_SUPPORT_CLASSIC="false"
fi
device_wait() {
COUNT=0
SLEEP=3
TRIES=10
echo "Waiting for device to become available: ${1}"
while [ ${COUNT} -le ${TRIES} ] ; do
echo "Checking device: ${COUNT}/${TRIES}"
test -c "${1}"
if [ $? -eq 0 ] ; then
echo "Device found"
return 0
fi
sleep $SLEEP
COUNT=$(($COUNT + 1))
done
echo "Device not found"
return 1
}
# Check if hardware is present - just exit if not #
nvidia_hw_ensure() {
lspci -d 10de: | grep -q 'NVIDIA Corporation' || exit 0
echo "NVIDIA hardware detected: $(lspci -d 10de:)"
}
# Create any data dirs if missing #
ensure_nvidia_data_dirs() {
mkdir -p "${SNAP_DATA}/etc/cdi"
mkdir -p "${SNAP_DATA}/etc/nvidia-container-runtime"
}
# Generate the CDI config #
cdi_generate () {
# Allow configured device-name-strategy, or default to index [ default in nvidia-ctk ] #
CDI_DEVICE_NAME_STRATEGY="$(snapctl get nvidia-support.cdi.device-name-strategy)"
CDI_DEVICE_NAME_STRATEGY="${CDI_DEVICE_NAME_STRATEGY:-index}"
# Default CDI libs search path and shell path for install on core systems #
CDI_LIB_SEARCH_PATH="${SNAP}/graphics/lib/${ARCH_TRIPLET}"
CDI_CONFIG_SEARCH_PATH="${SNAP}/graphics/share"
CDI_PATH="${PATH}:${SNAP}/graphics/bin"
# Otherwise, if on classic and nvidia driver is installed, set hostfs for the CDI libs search path and shell path #
[ "${NVIDIA_SUPPORT_CLASSIC}" == "true" ] && CDI_LIB_SEARCH_PATH="/var/lib/snapd/hostfs/usr/lib/${ARCH_TRIPLET}"
[ "${NVIDIA_SUPPORT_CLASSIC}" == "true" ] && CDI_CONFIG_SEARCH_PATH="/var/lib/snapd/hostfs/usr/share"
[ "${NVIDIA_SUPPORT_CLASSIC}" == "true" ] && CDI_PATH="${PATH}:/var/lib/snapd/hostfs/usr/bin"
# Generate the CDI spec
XDG_DATA_DIRS="${XDG_DATA_DIRS:-}:${CDI_CONFIG_SEARCH_PATH}" LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${CDI_LIB_SEARCH_PATH}" PATH="${CDI_PATH}" \
"${SNAP}/usr/bin/nvidia-ctk" cdi generate \
--nvidia-ctk-path "${SNAP}/usr/bin/nvidia-ctk" \
--library-search-path "${CDI_LIB_SEARCH_PATH}" \
--device-name-strategy "${CDI_DEVICE_NAME_STRATEGY}" \
--output "${SNAP_DATA}/etc/cdi/nvidia.yaml"
if [ "${NVIDIA_SUPPORT_CLASSIC}" == "true" ] ; then
# Replace container path for binaries such as nvidia-smi to make them discoverable from the default PATH
sed -i "s|containerPath: /var/lib/snapd/hostfs/usr/bin|containerPath: /usr/bin|g" "${SNAP_DATA}/etc/cdi/nvidia.yaml"
fi
}
# Create the nvidia runtime config, either snap default or custom #
nvidia_runtime_config () {
RUNTIME_CONFIG_OVERRIDE="$(snapctl get nvidia-support.runtime.config-override)"
# Custom #
if [ -n "${RUNTIME_CONFIG_OVERRIDE}" ] ; then
echo "${RUNTIME_CONFIG_OVERRIDE}" > "${SNAP_DATA}/etc/nvidia-container-runtime/config.toml"
# Default - opinionated, but most viable option for now #
else
# FIXME: CDI spec-dirs can be set is a list using `"${SNAP_DATA}/etc/cdi",/var/run/cdi`, once this is fixed: https://github.com/NVIDIA/nvidia-container-toolkit/issues/466
rm -f "${SNAP_DATA}/etc/nvidia-container-runtime/config.toml"
"${SNAP}/usr/bin/nvidia-ctk" config --in-place --set nvidia-container-runtime.mode=cdi --set nvidia-container-runtime.modes.cdi.spec-dirs="${SNAP_DATA}/etc/cdi" --config "${SNAP_DATA}/etc/nvidia-container-runtime/config.toml"
fi
}
# Generate the dockerd runtime config #
docker_runtime_configure () {
"${SNAP}/usr/bin/nvidia-ctk" runtime configure --runtime=docker --runtime-path "${SNAP}/usr/bin/nvidia-container-runtime" --config "${SNAP_DATA}/config/daemon.json"
}
# Setup failure recovery #
setup_fail () {
echo "WARNING: Conainter Toolkit setup seemed to fail with an error"
# Remove nvidia runtime config, if it exists #
jq -r 'del(.runtimes.nvidia)' "${SNAP_DATA}/config/daemon.json" > "${SNAP_DATA}/config/daemon.json.new"
# If it was removed [ there was a change ], copy in the new config, remove CDI config, and set service restart flag #
if ! cmp "${SNAP_DATA}/config/daemon.json"{,.new} >/dev/null ; then
mv "${SNAP_DATA}/config/daemon.json"{.new,}
rm -f "${SNAP_DATA}/etc/cdi/nvidia.yaml"
rm -f "${SNAP_DATA}/etc/nvidia-container-runtime/config.toml"
fi
}
# Info #
setup_info () {
echo "Conainter Toolkit setup complete"
}