feat: first fully working Nvidia support for KubeZero workers
This commit is contained in:
parent
622d12858a
commit
f50fa392d4
7
Makefile
7
Makefile
@ -2,6 +2,8 @@ BUILDER := v3.16.0
|
|||||||
RELEASE := v3.16
|
RELEASE := v3.16
|
||||||
PKG := '*'
|
PKG := '*'
|
||||||
|
|
||||||
|
CF_DIST := E1YFUJXMCXT2RN
|
||||||
|
|
||||||
.PHONY: builder aports_update download upload
|
.PHONY: builder aports_update download upload
|
||||||
|
|
||||||
all: build
|
all: build
|
||||||
@ -37,6 +39,9 @@ build: packages distfiles work
|
|||||||
download:
|
download:
|
||||||
aws s3 sync s3://zero-downtime-web/cdn/alpine/$(RELEASE)/kubezero/x86_64/ packages/work/x86_64/ --exclude APKINDEX.tar.gz
|
aws s3 sync s3://zero-downtime-web/cdn/alpine/$(RELEASE)/kubezero/x86_64/ packages/work/x86_64/ --exclude APKINDEX.tar.gz
|
||||||
|
|
||||||
upload:
|
invalidate:
|
||||||
|
aws cloudfront create-invalidation --distribution $(CF_DIST) --paths "/alpine/*"
|
||||||
|
|
||||||
|
upload: invalidate
|
||||||
aws s3 sync --delete packages/work/x86_64/ s3://zero-downtime-web/cdn/alpine/$(RELEASE)/kubezero/x86_64/ --exclude APKINDEX.tar.gz
|
aws s3 sync --delete packages/work/x86_64/ s3://zero-downtime-web/cdn/alpine/$(RELEASE)/kubezero/x86_64/ --exclude APKINDEX.tar.gz
|
||||||
aws s3 cp packages/work/x86_64/APKINDEX.tar.gz s3://zero-downtime-web/cdn/alpine/$(RELEASE)/kubezero/x86_64/ --cache-control max-age=1
|
aws s3 cp packages/work/x86_64/APKINDEX.tar.gz s3://zero-downtime-web/cdn/alpine/$(RELEASE)/kubezero/x86_64/ --cache-control max-age=1
|
||||||
|
@ -1,58 +1,78 @@
|
|||||||
# Contributor: Stefan Reimer <stefan@zero-downtime.net>
|
# Contributor: Stefan Reimer <stefan@zero-downtime.net>
|
||||||
# Maintainer: Stefan Reimer <stefan@zero-downtime.net>
|
# Maintainer: Stefan Reimer <stefan@zero-downtime.net>
|
||||||
|
|
||||||
pkgname=nvidia-container-toolkit
|
pkgname=nvidia-container-toolkit
|
||||||
|
|
||||||
pkgver=1.10.0
|
pkgver=1.10.0
|
||||||
pkgrel=1
|
pkgrel=1
|
||||||
|
pkgdesc="NVIDIA Container toolkit incl. cri hooks"
|
||||||
|
url="https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/overview.html"
|
||||||
|
arch="x86_64"
|
||||||
|
license="Apache"
|
||||||
|
makedepends="xz"
|
||||||
|
depends="glibc-bin nvidia-drivers"
|
||||||
|
options="!check !tracedeps"
|
||||||
|
|
||||||
pkgdesc='NVIDIA container runtime toolkit'
|
_nv_ver="$pkgver"-1
|
||||||
arch='x86_64'
|
_libcap=2.25-2
|
||||||
url='https://github.com/NVIDIA/nvidia-container-toolkit'
|
_libseccomp=2.3.3-4
|
||||||
license='Apache'
|
|
||||||
|
|
||||||
makedepends='go bash'
|
source="https://nvidia.github.io/libnvidia-container/stable/debian10/amd64/libnvidia-container1_"$_nv_ver"_amd64.deb
|
||||||
#depends='libnvidia-container-tools>=1.9.0'
|
https://nvidia.github.io/libnvidia-container/stable/debian10/amd64/libnvidia-container-tools_"$_nv_ver"_amd64.deb
|
||||||
options='!lto'
|
https://nvidia.github.io/libnvidia-container/stable/debian10/amd64/nvidia-container-toolkit_"$_nv_ver"_amd64.deb
|
||||||
|
http://deb.debian.org/debian/pool/main/libc/libcap2/libcap2_"$_libcap"_amd64.deb
|
||||||
source="${pkgname}-v${pkgver}-${pkgrel}.tar.gz"::"${url}/archive/v${pkgver}.tar.gz"
|
http://deb.debian.org/debian/pool/main/libs/libseccomp/libseccomp2_"$_libseccomp"_amd64.deb
|
||||||
_srcdir="${srcdir}/${pkgname}-${pkgver}"
|
config.toml
|
||||||
|
oci-nvidia-hook.json
|
||||||
|
"
|
||||||
|
|
||||||
build() {
|
build() {
|
||||||
mkdir bin
|
return 0
|
||||||
|
|
||||||
export GOPATH="${srcdir}"
|
|
||||||
export GOBIN="$GOPATH/bin"
|
|
||||||
|
|
||||||
go get "./..."
|
|
||||||
|
|
||||||
# Patch Nvidia dl loader
|
|
||||||
sed -i '/RTLD_DEEPBIND/d' vendor/github.com/NVIDIA/go-nvml/pkg/dl/dl.go
|
|
||||||
|
|
||||||
go build -v \
|
|
||||||
-modcacherw \
|
|
||||||
-buildmode=pie \
|
|
||||||
-gcflags "all=-trimpath=${PWD}" \
|
|
||||||
-asmflags "all=-trimpath=${PWD}" \
|
|
||||||
-ldflags "-s -w -extldflags ${LDFLAGS}" \
|
|
||||||
-trimpath \
|
|
||||||
-o bin \
|
|
||||||
"./..."
|
|
||||||
#-ldflags " -s -w -extldflags=-Wl,-z,now,-z,relro" \
|
|
||||||
}
|
}
|
||||||
|
|
||||||
package() {
|
package() {
|
||||||
install -D -m755 "${_srcdir}/bin/${pkgname}" "${pkgdir}/usr/bin/${pkgname}"
|
mkdir -p "$pkgdir"/usr/bin "$pkgdir"/usr/glibc-compat/lib/nvidia
|
||||||
|
|
||||||
cd "${pkgdir}/usr/bin/"
|
# libnvidia-container1
|
||||||
ln -sf "${pkgname}" "nvidia-container-runtime-hook"
|
ar -x "$srcdir"/libnvidia-container1_"$_nv_ver"_amd64.deb && tar xfJ data.tar.xz
|
||||||
cd -
|
mv usr/lib/x86_64-linux-gnu/* "$pkgdir"/usr/glibc-compat/lib/nvidia
|
||||||
install -D -m644 "${_srcdir}/config/config.toml.centos" "${pkgdir}/etc/nvidia-container-runtime/config.toml"
|
rm -rf control.tar.xz data.tar.xz debian-binary usr
|
||||||
install -D -m644 "${_srcdir}/oci-nvidia-hook.json" "${pkgdir}/usr/share/containers/oci/hooks.d/00-oci-nvidia-hook.json"
|
|
||||||
|
|
||||||
install -D -m644 "${_srcdir}/LICENSE" "${pkgdir}/usr/share/licenses/$pkgname/LICENSE"
|
# libnvidia-container-tools
|
||||||
|
ar -x "$srcdir"/libnvidia-container-tools_"$_nv_ver"_amd64.deb && tar xfJ data.tar.xz
|
||||||
|
mv usr/bin/nvidia-container-cli "$pkgdir"/usr/bin
|
||||||
|
rm -rf control.tar.xz data.tar.xz debian-binary usr
|
||||||
|
|
||||||
|
# nvidia-container-toolkit
|
||||||
|
ar -x "$srcdir"/nvidia-container-toolkit_"$_nv_ver"_amd64.deb && tar xfJ data.tar.xz
|
||||||
|
mv usr/bin/nvidia* "$pkgdir"/usr/bin
|
||||||
|
rm -rf control.tar.xz data.tar.xz debian-binary usr
|
||||||
|
|
||||||
|
# Add dependencies which also need to be compiled against glibc: libcap, libseccomp
|
||||||
|
# libcap
|
||||||
|
ar -x "$srcdir"/libcap2_"$_libcap"_amd64.deb && tar xfJ data.tar.xz
|
||||||
|
mv lib/x86_64-linux-gnu/libcap.so.* "$pkgdir"/usr/glibc-compat/lib
|
||||||
|
rm -rf control.tar.xz data.tar.xz debian-binary usr
|
||||||
|
|
||||||
|
# libseccomp
|
||||||
|
ar -x "$srcdir"/libseccomp2_"$_libseccomp"_amd64.deb && tar xfJ data.tar.xz
|
||||||
|
mv usr/lib/x86_64-linux-gnu/libseccomp.so.* "$pkgdir"/usr/glibc-compat/lib
|
||||||
|
rm -rf control.tar.xz data.tar.xz debian-binary usr
|
||||||
|
|
||||||
|
# Now lets patch the elf binaries to fix library paths and order
|
||||||
|
doas apk add patchelf@edge-community
|
||||||
|
patchelf --remove-rpath "$pkgdir"/usr/bin/nvidia-container-cli
|
||||||
|
patchelf --remove-rpath "$pkgdir"/usr/glibc-compat/lib/nvidia/libnvidia-container.so.1
|
||||||
|
|
||||||
|
# Install our runtime config and oci hook
|
||||||
|
install -Dm644 "$srcdir"/oci-nvidia-hook.json "$pkgdir"/usr/share/containers/oci/hooks.d/oci-nvidia-hook.json
|
||||||
|
install -Dm644 config.toml "$pkgdir"/etc/nvidia-container-runtime/config.toml
|
||||||
}
|
}
|
||||||
|
|
||||||
sha512sums='
|
sha512sums='
|
||||||
4f35918811edf7ea0f24d04eba12e4670b039edd0bb29ed4f16b47c4208f8a3d4316042f5ca52a278fedd1dce549070df69807080b1a7bda4a859369ad707bf9 nvidia-container-toolkit-v1.10.0-1.tar.gz
|
23ba2aec28f01c0037bbb4812ea542589e96f6527cf49468a4a7c54ca05808cf0984a8dfe13ee3455b8c6ae8468c58590f9e1e6996927c710bcf3e545772a356 libnvidia-container1_1.10.0-1_amd64.deb
|
||||||
|
c5369c832bd91703e6e6e86a4431c2eebb2ddeaadff126174b41ed11e969dc8cc49dcab26b3ac18abb43b466a86ce76908eaa2f5e4109c689a0c3a4fa47548b5 libnvidia-container-tools_1.10.0-1_amd64.deb
|
||||||
|
3043729bd96dd153db1dc317685167f34da6b9d202134335212fb7d861532a265a59e002c86fff2664c67687f4c8bcc75913c74018930a6c68c0f2044eceacf0 nvidia-container-toolkit_1.10.0-1_amd64.deb
|
||||||
|
694a3ec64ef3056d5874ff03b889b868c294bccb16506468fdf1c289fe3aaadc2da25a5934de653af9633a5d993d2bb21491d84b3b2e2529e6b31d92c78a2228 libcap2_2.25-2_amd64.deb
|
||||||
|
5a4eaa96e6e774948889909d618a8ed44a82f649cbba11622dc7b4478098bea006995d5a5a60ca026a57b76ad866d1e2c6caebd154a26eb6bd7e15291b558057 libseccomp2_2.3.3-4_amd64.deb
|
||||||
|
040ac2e3f58549dc09e5bce0d694e4be2f6aae736014bf0ee90042646562d5f1ef1f5990eb9f2c2a2fdf504587b82f4aa0eb99d04c5d3e407670e4012e3edd4e config.toml
|
||||||
|
fe02a2749c18876eda344764026c0cabae1134cd9504d92e877000c000c99783ee7d3af16e1981c85a4e7bad0951060b88028d0576aa17bfae7d241838b86fb6 oci-nvidia-hook.json
|
||||||
'
|
'
|
||||||
|
28
v3.16/nvidia-container-toolkit/config.toml
Normal file
28
v3.16/nvidia-container-toolkit/config.toml
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
disable-require = false
|
||||||
|
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||||
|
#accept-nvidia-visible-devices-as-volume-mounts = false
|
||||||
|
|
||||||
|
[nvidia-container-cli]
|
||||||
|
#root = "/run/nvidia/driver"
|
||||||
|
#path = "/usr/bin/nvidia-container-cli"
|
||||||
|
environment = []
|
||||||
|
debug = "/var/log/nvidia-container-toolkit.log"
|
||||||
|
#ldcache = "/etc/ld.so.cache"
|
||||||
|
load-kmods = false
|
||||||
|
#no-cgroups = false
|
||||||
|
#user = "root:video"
|
||||||
|
ldconfig = "@/usr/glibc-compat/sbin/ldconfig"
|
||||||
|
|
||||||
|
[nvidia-container-runtime]
|
||||||
|
debug = "/var/log/nvidia-container-runtime.log"
|
||||||
|
#log-level = "debug"
|
||||||
|
|
||||||
|
# Specify the runtimes to consider. This list is processed in order and the PATH
|
||||||
|
# searched for matching executables unless the entry is an absolute path.
|
||||||
|
runtimes = [
|
||||||
|
"crun",
|
||||||
|
]
|
||||||
|
mode = "legacy"
|
||||||
|
|
||||||
|
[nvidia-container-runtime.modes.csv]
|
||||||
|
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
12
v3.16/nvidia-container-toolkit/oci-nvidia-hook.json
Normal file
12
v3.16/nvidia-container-toolkit/oci-nvidia-hook.json
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
"version": "1.0.0",
|
||||||
|
"hook": {
|
||||||
|
"path": "/usr/bin/nvidia-container-toolkit",
|
||||||
|
"args": ["nvidia-container-toolkit", "prestart"]
|
||||||
|
},
|
||||||
|
"when": {
|
||||||
|
"always": true,
|
||||||
|
"commands": ["nvidia.*"]
|
||||||
|
},
|
||||||
|
"stages": ["prestart"]
|
||||||
|
}
|
57
v3.16/nvidia-drivers/APKBUILD
Normal file
57
v3.16/nvidia-drivers/APKBUILD
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
# Contributor: Stefan Reimer <stefan@zero-downtime.net>
|
||||||
|
# Maintainer: Stefan Reimer <stefan@zero-downtime.net>
|
||||||
|
pkgname=nvidia-drivers
|
||||||
|
pkgver=515.65.01
|
||||||
|
pkgrel=0
|
||||||
|
pkgdesc="NVIDIA Driver"
|
||||||
|
url="https://www.nvidia.com/download/index.aspx"
|
||||||
|
arch="x86_64"
|
||||||
|
license="MIT OR GPL-2.0"
|
||||||
|
makedepends="bash xz"
|
||||||
|
depends="glibc-bin"
|
||||||
|
options="!check !strip !tracedeps"
|
||||||
|
|
||||||
|
source="NVIDIA-Linux-x86_64-$pkgver.run::https://download.nvidia.com/XFree86/Linux-x86_64/$pkgver/NVIDIA-Linux-x86_64-$pkgver.run"
|
||||||
|
|
||||||
|
build() {
|
||||||
|
sh "$srcdir"/NVIDIA-Linux-x86_64-$pkgver.run -x -s
|
||||||
|
}
|
||||||
|
|
||||||
|
package() {
|
||||||
|
#/bin/bash
|
||||||
|
|
||||||
|
# Install basic nvidia-smi inck. libnvidia-ml, libcuda into /lib64 as this is hardcoded in the nvidia-container-toolkit bins
|
||||||
|
mkdir -p "$pkgdir"/usr/glibc-compat/lib/nvidia "$pkgdir"/usr/sbin
|
||||||
|
|
||||||
|
# tell glibc compat about our libs
|
||||||
|
mkdir -p "$pkgdir"/usr/glibc-compat/etc/ld.so.conf.d
|
||||||
|
echo "/usr/glibc-compat/lib/nvidia" > "$pkgdir"/usr/glibc-compat/etc/ld.so.conf.d/nvidia.conf
|
||||||
|
|
||||||
|
cd "$srcdir"/NVIDIA-Linux-x86_64-$pkgver
|
||||||
|
|
||||||
|
SBINS="nvidia-smi nvidia-debugdump nvidia-cuda-mps-control nvidia-cuda-mps-server nvidia-persistenced"
|
||||||
|
for f in $SBINS; do
|
||||||
|
cp $f "$pkgdir"/usr/sbin
|
||||||
|
done
|
||||||
|
|
||||||
|
# which libs are from debug log at runtime
|
||||||
|
# LIBS=$(grep "missing library" /var/log/nvidia-container-toolkit.log | awk '{print $7}' | sort | uniq)
|
||||||
|
# cross checked via .manifest for targets and symlinks
|
||||||
|
LIBS="libEGL_nvidia.so libGLESv1_CM_nvidia.so libGLESv2_nvidia.so libGLX_nvidia.so libcuda.so libcudadebugger.so libnvcuvid.so libnvidia-allocator.so libnvidia-cbl.so libnvidia-cfg.so libnvidia-compiler.so libnvidia-eglcore.so libnvidia-encode.so libnvidia-fatbinaryloader.so libnvidia-fbc.so libnvidia-glcore.so libnvidia-glsi.so libnvidia-glvkspirv.so libnvidia-ifr.so libnvidia-ml.so libnvidia-ngx.so libnvidia-nscq.so libnvidia-opencl.so libnvidia-opticalflow.so libnvidia-pkcs11.so libnvidia-ptxjitcompiler.so libnvidia-rtcore.so libnvidia-tls.so libnvoptix.so libvdpau_nvidia.so"
|
||||||
|
|
||||||
|
for lib in $LIBS; do
|
||||||
|
native=$(grep $lib .manifest | grep _LIB | grep NATIVE | grep -v SYMLINK | awk '{print $1}')
|
||||||
|
if [ -n "$native" ]; then
|
||||||
|
cp $native "$pkgdir"/usr/glibc-compat/lib/nvidia
|
||||||
|
|
||||||
|
links="$(grep $native .manifest | grep SYMLINK | grep -v COMPAT32 | awk '{print $1}')"
|
||||||
|
for target in $links; do
|
||||||
|
ln -s /usr/glibc-compat/lib/nvidia/$native "$pkgdir"/usr/glibc-compat/lib/nvidia/$target
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
sha512sums='
|
||||||
|
5221a4ac071eb39a37a841f19cfe4983286dc35e918956b40604404ef36c122612475df7b9a391a9a70bd60f44e598c8a0e5ec54ccc3e90d51f01e1b2fbe5e33 NVIDIA-Linux-x86_64-515.65.01.run
|
||||||
|
'
|
@ -1,6 +1,6 @@
|
|||||||
# Contributor: Stefan Reimer <stefan@zero-downtime.net>
|
# Contributor: Stefan Reimer <stefan@zero-downtime.net>
|
||||||
# Maintainer: Stefan Reimer <stefan@zero-downtime.net>
|
# Maintainer: Stefan Reimer <stefan@zero-downtime.net>
|
||||||
pkgname=nvidia-gpu-driver
|
pkgname=nvidia-open-gpu
|
||||||
pkgver=515.65.01
|
pkgver=515.65.01
|
||||||
pkgrel=0
|
pkgrel=0
|
||||||
pkgdesc="NVIDIA Linux open GPU kernel modules incl. GSP firmware"
|
pkgdesc="NVIDIA Linux open GPU kernel modules incl. GSP firmware"
|
||||||
@ -8,10 +8,11 @@ url="https://github.com/NVIDIA/open-gpu-kernel-modules"
|
|||||||
arch="x86_64"
|
arch="x86_64"
|
||||||
license="MIT OR GPL-2.0"
|
license="MIT OR GPL-2.0"
|
||||||
makedepends="bash linux-headers linux-virt-dev xz"
|
makedepends="bash linux-headers linux-virt-dev xz"
|
||||||
options="!check !strip !tracedeps lib64"
|
options="!checkp !strip"
|
||||||
|
|
||||||
source="nvidia-$pkgver.tar.gz::https://github.com/NVIDIA/open-gpu-kernel-modules/archive/refs/tags/$pkgver.tar.gz
|
source="nvidia-$pkgver.tar.gz::https://github.com/NVIDIA/open-gpu-kernel-modules/archive/refs/tags/$pkgver.tar.gz
|
||||||
NVIDIA-Linux-x86_64-$pkgver.run::https://download.nvidia.com/XFree86/Linux-x86_64/$pkgver/NVIDIA-Linux-x86_64-$pkgver.run"
|
NVIDIA-Linux-x86_64-$pkgver.run::https://download.nvidia.com/XFree86/Linux-x86_64/$pkgver/NVIDIA-Linux-x86_64-$pkgver.run"
|
||||||
|
|
||||||
builddir="$srcdir/open-gpu-kernel-modules-$pkgver"
|
builddir="$srcdir/open-gpu-kernel-modules-$pkgver"
|
||||||
|
|
||||||
build() {
|
build() {
|
||||||
@ -23,7 +24,7 @@ build() {
|
|||||||
|
|
||||||
package() {
|
package() {
|
||||||
KERNEL_VERSION=$(basename $(ls -d /lib/modules/*-virt))
|
KERNEL_VERSION=$(basename $(ls -d /lib/modules/*-virt))
|
||||||
depends="glibc-bin linux-virt=~$(echo $KERNEL_VERSION | sed -e 's/-.*$//')"
|
depends="linux-virt=~$(echo $KERNEL_VERSION | sed -e 's/-.*$//')"
|
||||||
|
|
||||||
modules="nvidia.ko \
|
modules="nvidia.ko \
|
||||||
nvidia-drm.ko \
|
nvidia-drm.ko \
|
||||||
@ -39,14 +40,6 @@ package() {
|
|||||||
# Add gsp firmware see: https://download.nvidia.com/XFree86/Linux-x86_64/$pkgver/README/gsp.html
|
# Add gsp firmware see: https://download.nvidia.com/XFree86/Linux-x86_64/$pkgver/README/gsp.html
|
||||||
mkdir -p "$pkgdir"/lib/firmware/nvidia/"$pkgver"
|
mkdir -p "$pkgdir"/lib/firmware/nvidia/"$pkgver"
|
||||||
sh "$srcdir"/NVIDIA-Linux-x86_64-$pkgver.run -x -s && cp NVIDIA-Linux-x86_64-$pkgver/firmware/gsp.bin "$pkgdir"/lib/firmware/nvidia/"$pkgver"/gsp.bin
|
sh "$srcdir"/NVIDIA-Linux-x86_64-$pkgver.run -x -s && cp NVIDIA-Linux-x86_64-$pkgver/firmware/gsp.bin "$pkgdir"/lib/firmware/nvidia/"$pkgver"/gsp.bin
|
||||||
|
|
||||||
#/bin/bash
|
|
||||||
|
|
||||||
# Install basic nvidia-smi inck. libnvidia-ml, libcuda into /lib64 as this is hardcoded in the nvidia-container-toolkit bins
|
|
||||||
mkdir -p "$pkgdir"/lib64 "$pkgdir"/usr/sbin
|
|
||||||
cp NVIDIA-Linux-x86_64-$pkgver/libnvidia-ml.so."$pkgver" "$pkgdir"/lib64/libnvidia-ml.so.1
|
|
||||||
cp NVIDIA-Linux-x86_64-$pkgver/libcuda.so."$pkgver" "$pkgdir"/lib64/libcuda.so.1
|
|
||||||
cp NVIDIA-Linux-x86_64-$pkgver/nvidia-smi "$pkgdir"/usr/sbin/nvidia-smi
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sha512sums='
|
sha512sums='
|
Loading…
Reference in New Issue
Block a user