From de8559fec27867150cdaab512b9d38381ec4ce9a Mon Sep 17 00:00:00 2001 From: Stefan Reimer Date: Wed, 26 Apr 2023 17:22:52 +0000 Subject: [PATCH] Alpine 3.17 rollout, KubeZero 1.25 upgrades --- Dockerfile | 4 +- Makefile | 2 +- kubezero/aws-iam-authenticator/APKBUILD | 4 +- kubezero/aws-neuron-driver/APKBUILD | 16 +- kubezero/cri-o/APKBUILD | 8 +- kubezero/docker-registry/APKBUILD | 18 +-- kubezero/ecr-credential-provider/APKBUILD | 8 +- kubezero/fluent-bit/APKBUILD | 12 +- kubezero/glibc/APKBUILD | 5 +- kubezero/kubernetes/APKBUILD | 13 +- kubezero/kubernetes/kubelet.initd | 1 + kubezero/kubezero/APKBUILD | 21 ++- kubezero/kubezero/evictLocalNode.sh | 52 +++++++ .../etc/nvidia-container-runtime/config.toml | 32 ++++ kubezero/nvidia-open-gpu/APKBUILD | 2 +- kubezero/zdt-base/APKBUILD | 62 +++++--- kubezero/zdt-base/cb-volumes.startstop | 18 --- kubezero/zdt-base/cb_base.sh | 147 ++++++++++++++++++ kubezero/zdt-base/cb_init.sh | 108 +++++++++++++ kubezero/zdt-base/cb_lock.sh | 67 ++++++++ .../zdt-base/{lib-base.sh => cb_volumes.sh} | 22 +-- ...-mount-var.init => cloudbender-early.init} | 4 +- kubezero/zdt-base/cloudbender.init | 48 ++++++ kubezero/zdt-base/cloudbender.stop | 15 -- kubezero/zdt-base/profile | 4 + kubezero/zdt-base/syslog-ng.apparmor | 4 + kubezero/zdt-base/uniq_hostname.py | 102 ++++++++++++ kubezero/zdt-base/zdt-base.post-install | 3 +- 28 files changed, 675 insertions(+), 127 deletions(-) create mode 100644 kubezero/kubezero/evictLocalNode.sh create mode 100644 kubezero/nvidia-container-toolkit/etc/nvidia-container-runtime/config.toml delete mode 100755 kubezero/zdt-base/cb-volumes.startstop create mode 100644 kubezero/zdt-base/cb_base.sh create mode 100644 kubezero/zdt-base/cb_init.sh create mode 100644 kubezero/zdt-base/cb_lock.sh rename kubezero/zdt-base/{lib-base.sh => cb_volumes.sh} (84%) rename kubezero/zdt-base/{cb-mount-var.init => cloudbender-early.init} (64%) create mode 100755 kubezero/zdt-base/cloudbender.init delete mode 100755 kubezero/zdt-base/cloudbender.stop create mode 100644 kubezero/zdt-base/profile create mode 100644 kubezero/zdt-base/syslog-ng.apparmor create mode 100755 kubezero/zdt-base/uniq_hostname.py diff --git a/Dockerfile b/Dockerfile index 97b0f5b..28550fd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ -FROM alpine:3.16 -ARG ALPINE="v3.16" +FROM alpine:3.17 +ARG ALPINE="v3.17" RUN echo "http://dl-cdn.alpinelinux.org/alpine/${ALPINE}/main" > /etc/apk/repositories && \ echo "http://dl-cdn.alpinelinux.org/alpine/${ALPINE}/community" >> /etc/apk/repositories && \ diff --git a/Makefile b/Makefile index 5b1bb42..75f29b9 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ REGION := us-east-1 include .ci/podman.mk -BUILDER := v3.16.3 +BUILDER := v3.17.2 PKG := '*' CF_DIST := E1YFUJXMCXT2RN diff --git a/kubezero/aws-iam-authenticator/APKBUILD b/kubezero/aws-iam-authenticator/APKBUILD index 3cea659..86bc2ca 100644 --- a/kubezero/aws-iam-authenticator/APKBUILD +++ b/kubezero/aws-iam-authenticator/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Stefan Reimer # Maintainer: Stefan Reimer pkgname=aws-iam-authenticator -pkgver=0.5.11 +pkgver=0.6.2 pkgrel=0 pkgdesc="AWS aws-iam-authenticator" url="https://github.com/kubernetes-sigs/aws-iam-authenticator" @@ -20,5 +20,5 @@ package() { } sha512sums=" -bf3ea959c81fa1b94463f2bd892010697b43fddb825a55c9fd62f418fccde2df804490010ec91c050c7ad01307508e220200c9705157a656a8ae79122c4fe9e8 aws-iam-authenticator-0.5.11.tar.gz +4789fe7c11d4d1b94da5f35844a0da8e62da743bef3fc13f668c542f3dbc83584ef29abbcebc6f4651aad8ecbd9195d6bfc13476c7dd4a1d34ed11822652fc5e aws-iam-authenticator-0.6.2.tar.gz " diff --git a/kubezero/aws-neuron-driver/APKBUILD b/kubezero/aws-neuron-driver/APKBUILD index 391082f..a8a0e55 100644 --- a/kubezero/aws-neuron-driver/APKBUILD +++ b/kubezero/aws-neuron-driver/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Stefan Reimer # Maintainer: Stefan Reimer pkgname=aws-neuron-driver -pkgver=2.3.26.0 +pkgver=2.8.4.0 pkgrel=0 pkgdesc="Linux Kernel module for AWS Neuron INF instances" url="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/release-notes/index.html#" @@ -12,11 +12,11 @@ options="!check" # https://awsdocs-neuron.readthedocs-hosted.com/en/latest/release-notes/neuron-driver.html#neuron-driver-release-notes # apt-get download --print-uris aws-neuron-dkms | cut -d' ' -f1 -source="$pkgname-$pkgver.deb::https://apt.repos.neuron.amazonaws.com/pool/main/a/aws-neuron-dkms/aws-neuron-dkms_"$pkgver"_amd64.deb" +source="$pkgname-$pkgver.deb::https://apt.repos.neuron.amazonaws.com/pool/main/a/aws-neuronx-dkms/aws-neuronx-dkms_"$pkgver"_amd64.deb" unpack() { - ar -x "$srcdir/$pkgname-$pkgver.deb" && tar xfJo data.tar.xz - mv usr/src/aws-neuron-"$pkgver" "$srcdir/$pkgname-$pkgver" + ar -x "$srcdir/$pkgname-$pkgver.deb" && find . -type d -exec chmod 755 {} \; && tar xfJo data.tar.xz + mv usr/src/aws-neuronx-"$pkgver" "$srcdir/$pkgname-$pkgver" rm -rf usr data.tar.xz control.tar.xz debian-binary # What is wrong with AWS ? @@ -33,7 +33,7 @@ build() { package() { KERNEL_VERSION=$(basename $(ls -d /lib/modules/*-virt)) - depends="linux-virt=~$(echo $KERNEL_VERSION | sed -e 's/-.*$//')" + depends="linux-virt~$(echo $KERNEL_VERSION | sed -e 's/-.*$//')" mkdir -p "$pkgdir"/lib/modules/$KERNEL_VERSION/kernel "$pkgdir"/lib/udev/rules.d gzip -9 -c neuron.ko > "$pkgdir"/lib/modules/$KERNEL_VERSION/kernel/neuron.ko.gz @@ -41,6 +41,6 @@ package() { echo 'KERNEL=="neuron*", MODE="0666"' > "$pkgdir"/lib/udev/rules.d/90-neuron-udev.rules } -sha512sums=' -81a233daf540b7ea3f334e8934ca1a89dd2cdedd8d0a448ae080ea5f7e28ef4b0aaa00e09644b69c29b759098ce82c1cef0a57ee68380f1fe0835883b8d33ab7 aws-neuron-driver-2.3.26.0.deb -' +sha512sums=" +1fa536cf32fb9a0d383e73c6694ddbdee38a775a25a7d0013322c4e4b4c724d546082f88ac1c8e485e808312d7821453a9d27391e98f613431ccff0081a76483 aws-neuron-driver-2.8.4.0.deb +" diff --git a/kubezero/cri-o/APKBUILD b/kubezero/cri-o/APKBUILD index 8bc7281..d0f9b67 100644 --- a/kubezero/cri-o/APKBUILD +++ b/kubezero/cri-o/APKBUILD @@ -3,7 +3,7 @@ # Contributor: TBK # Maintainer: ungleich pkgname=cri-o -pkgver=1.23.3 +pkgver=1.25.3 pkgrel=0 pkgdesc="OCI-based implementation of Kubernetes Container Runtime Interface" url="https://github.com/cri-o/cri-o/" @@ -50,7 +50,6 @@ source="https://github.com/cri-o/cri-o/archive/v$pkgver/cri-o-$pkgver.tar.gz crio.logrotated cni-plugins-path.patch makefile-fix-install.patch - fix-test.patch remove-systemd-files.patch " @@ -62,7 +61,7 @@ build() { export GOPATH="$srcdir" export GOBIN="$GOPATH/bin" # https://github.com/cri-o/cri-o/blob/master/install.md#build-tags - make BUILDTAGS="seccomp selinux containers_image_openpgp containers_image_ostree_stub" + make BUILDTAGS="seccomp selinux containers_image_openpgp containers_image_ostree_stub apparmor" } check() { @@ -88,12 +87,11 @@ contrib_cni() { } sha512sums=" -7613c3d6de00c3366f69d5195a1ef7537422ac53de62615a25c292dedd4b2a10722da3055dea5a08d1f3a502b2fc187c9c5b827d5382c96b48c522b4434c2f67 cri-o-1.23.3.tar.gz +39b162c55141af009879f600c4b6cf91b6d710392bf07783080efe195f3ece1a0ed186eeadaf3a84bbed11a376995c3fab3c951a6d7ed14bb7e85b39e7920e21 cri-o-1.25.3.tar.gz e026f056ed92489413e16ed7955a9dcd7d1f4df1cc28e3ea785771b44d43811fea4f5b953cc46bc0c4aeac8ad07115bfff304d7516ebd24f2e58fe782ff812c8 crio.conf 29561e95398975748236217bbd9df64997f6e3de6c0555d007306bd0535895a648368385a13079eb7d52c06249a91980523a73b6563e86d0575d9cd9c3fa4ee9 crio.initd 1115228546a696eeebeb6d4b3e5c3152af0c99a2559097fc5829d8b416d979c457b4b1789e0120054babf57f585d3f63cbe49949d40417ae7aab613184bf4516 crio.logrotated 0a567dfa431ab1e53f2a351689be8d588a60cc5fcdbda403ec4f8b6ab9b1c18ad425f6c47f9a5ab1491e3a61a269dc4efa6a59e91e7521fa2b6bb165074aa8e0 cni-plugins-path.patch f9577aa7b1c90c6809010e9e406e65092251b6e82f6a0adbc3633290aa35f2a21895e1a8b6ba4b6375dcad3e02629b49a34ab16387e1c36eeb32c8f4dac74706 makefile-fix-install.patch -26ed10b478feb19cb11f5916b24301943f9e316fdd62d53ec310bb05ffcf4213ceece1340d2486461557abb04074e85002b11b6347fddaaa45ad7439e907a5a7 fix-test.patch 78c150f87027de489289596371dce0465159ced0758776b445deb58990e099de9c654406183c9da3cc909878b24d28db62121b7056cd180a6f2820e79e165cc6 remove-systemd-files.patch " diff --git a/kubezero/docker-registry/APKBUILD b/kubezero/docker-registry/APKBUILD index 2ddddd9..d46696a 100644 --- a/kubezero/docker-registry/APKBUILD +++ b/kubezero/docker-registry/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Christian Kampka # Maintainer: pkgname=docker-registry -pkgver=2.9.0_git20220818 +pkgver=2.9.0_git20230327 pkgrel=1 pkgdesc="An implementation of the Docker Registry HTTP API V2 for use with docker 1.6+" url="https://github.com/distribution/distribution" @@ -14,7 +14,7 @@ pkgusers="docker-registry" pkggroups="docker-registry" subpackages="$pkgname-openrc" #source="$pkgname-$pkgver.tar.gz::$url/archive/v$pkgver.tar.gz -source="$pkgname-$pkgver.tar.gz::$url/archive/6c237953cbbe9ae855e483c59d9085fb1c8aa01b.tar.gz +source="$pkgname-$pkgver.tar.gz::$url/archive/0c958010ace2e0c2a87f1bf9915b7c74157dfb62.tar.gz docker-registry.initd config-example.patch" builddir="$srcdir/src/github.com/docker/distribution" @@ -23,16 +23,16 @@ options="chmod-clean" prepare() { mkdir -p "${builddir%/*}" #mv "$srcdir"/distribution-$pkgver "$builddir" - mv "$srcdir"/distribution-6c237953cbbe9ae855e483c59d9085fb1c8aa01b "$builddir" + mv "$srcdir"/distribution-0c958010ace2e0c2a87f1bf9915b7c74157dfb62 "$builddir" } build() { make binaries \ - DISTRIBUTION_DIR="$builddir" \ - VERSION="$pkgver" \ - REVISION="$pkgrel" \ - GOFLAGS="-buildmode=pie" \ - GOLDFLAGS="-extldflags=-static -w -s" + DISTRIBUTION_DIR="$builddir" \ + VERSION="$pkgver" \ + REVISION="$pkgrel" \ + GOFLAGS="-buildmode=pie" \ + GOLDFLAGS="-extldflags=-static -w -s" } check() { @@ -57,7 +57,7 @@ package() { } sha512sums=" -78cc813422eda98495592c6b6c005e90d4beb922b7d27c10308c62bb4fce4b1fc804663ba3e0bccc043b9d69893d83ce40f6b33351c3f854c874627356377a17 docker-registry-2.9.0_git20220818.tar.gz +baf540b81d5f736e105eb2c05f5f4775c61ace3118f965a52b7b477a596291e12b33e56f882ce364731e9701ae6e9b2e09add3bcf8a1a11bb25eb54833c14368 docker-registry-2.9.0_git20230327.tar.gz 96100a4de311afa19d293a3b8a63105e1fcdf49258aa8b1752befd389e6b4a2b1f70711341ea011b450d4468bd37dbd07a393ffab3b9aa1b2213cf0fdd915904 docker-registry.initd 5a38f4d3f0ee5cd00c0a5ced744eb5b29b839da5921adea26c5de3eb88b6b2626a7ba29b1ab931e5f8fbfafbed8c94cb972a58737ec0c0a69cf515c32139e387 config-example.patch " diff --git a/kubezero/ecr-credential-provider/APKBUILD b/kubezero/ecr-credential-provider/APKBUILD index 14d7e66..2aa9cda 100644 --- a/kubezero/ecr-credential-provider/APKBUILD +++ b/kubezero/ecr-credential-provider/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Stefan Reimer # Maintainer: Stefan Reimer pkgname=ecr-credential-provider -pkgver=1.23.2 +pkgver=1.25.3 pkgrel=0 pkgdesc="AWS Kubernetes ecr-credential-provider" url="https://github.com/kubernetes/cloud-provider-aws" @@ -23,6 +23,6 @@ package() { install -Dm755 "$builddir/ecr-credential-provider" "$pkgdir"/usr/libexec/kubernetes/kubelet-plugins } -sha512sums=' -fe35267640981bb297086817ac9fb4a6279149ee538db09ef2b7785ecc7f8428ec34dd5ebac41989a2533b1f96225c7dd39cb76002f7e6e3b3a870a01d42a3c3 ecr-credential-provider-1.23.2.tar.gz -' +sha512sums=" +d727c01ea98608b0b51edc2bfe892218b55eee7148e358e18387f3f4a52ad765f8d0ee372884e36f95f1303c13dbeba81926f7560c325a8d3c258da11cdfc24b ecr-credential-provider-1.25.3.tar.gz +" diff --git a/kubezero/fluent-bit/APKBUILD b/kubezero/fluent-bit/APKBUILD index a79acbc..8669014 100644 --- a/kubezero/fluent-bit/APKBUILD +++ b/kubezero/fluent-bit/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Stefan Reimer # Maintainer: Stefan Reimer pkgname=fluent-bit -pkgver=1.9.9 +pkgver=2.1.1 pkgrel=0 pkgdesc="Fast and Lightweight Log processor and forwarder" url="https://fluentbit.io/" @@ -12,7 +12,9 @@ makedepends=" bison cmake flex - fts-dev + linux-headers + musl-fts-dev + openssl-dev gtest-dev yaml-dev zlib-dev @@ -83,9 +85,9 @@ package() { mv "$pkgdir"/usr/etc/* "$pkgdir"/etc } -sha512sums=' -95ff8353153cc1a45bc17c834633f57ad1dc4805351c59172c34c853a47b7fb0484d4dd6aae016a83b5c3324f4d0704143a1e63a2f91b4d18d086adac4d7740b fluent-bit-1.9.9.tar.gz +sha512sums=" +8c682e41411cae42580636a8d55b3f6c09b729f8e25f7d4e4b306ff286e0aea91da9ebc1a57dee153a90117884cc2a9d4342cae0e860a2f5f74a8a8c4f3b1e81 fluent-bit-2.1.1.tar.gz f6431397c80a036980b5377b51e38aec25dfceeb8dbe4cd54dce1f6e77d669d9f8daf983fcc96d25332385888f1809ced5e8ab0e8ccfcd93d19494036e3dc949 fluent-bit.confd 8ba6c8e84dee90176f9b4375fb2c6444fa5d32fa601d9bcf3ea7960fec87f1ef664f175caf08bd0b052843e971efdbf08e2a5cd180ad9a8f23ff2c5cb233814f fluent-bit.initd 6bd7d8b4da93a17f29b6ea1e0286ea226d0e376024284741110936779b3229bd8d6cd03ffbdc5d3b4842294e7f32a888de0dd16b0851b65d91b062ca58530ea0 chunkio-static-lib-fts.patch -' +" diff --git a/kubezero/glibc/APKBUILD b/kubezero/glibc/APKBUILD index 0675963..334b1a6 100644 --- a/kubezero/glibc/APKBUILD +++ b/kubezero/glibc/APKBUILD @@ -3,13 +3,12 @@ pkgname="glibc" pkgver="2.35" _pkgrel="0" -pkgrel="0" +pkgrel="1" pkgdesc="GNU C Library compatibility layer" arch="x86_64" url="https://github.com/sgerrand/alpine-pkg-glibc" license="LGPL" source="https://github.com/sgerrand/docker-glibc-builder/releases/download/$pkgver-$_pkgrel/glibc-bin-$pkgver-$_pkgrel-x86_64.tar.gz -nsswitch.conf ld.so.conf" subpackages="$pkgname-bin $pkgname-dev $pkgname-i18n" triggers="$pkgname-bin.trigger=/lib:/usr/lib:/usr/glibc-compat/lib:/lib64" @@ -20,7 +19,6 @@ package() { mkdir -p "$pkgdir/lib" "$pkgdir/lib64" "$pkgdir/usr/glibc-compat/lib/locale" "$pkgdir"/usr/glibc-compat/lib64 "$pkgdir"/etc cp -a "$srcdir"/usr "$pkgdir" cp "$srcdir"/ld.so.conf "$pkgdir"/usr/glibc-compat/etc/ld.so.conf - cp "$srcdir"/nsswitch.conf "$pkgdir"/etc/nsswitch.conf rm "$pkgdir"/usr/glibc-compat/etc/rpc rm -rf "$pkgdir"/usr/glibc-compat/bin rm -rf "$pkgdir"/usr/glibc-compat/sbin @@ -51,6 +49,5 @@ i18n() { sha512sums=" 0aff0ec76f4d341957a792b8635c0770148eba9a5cb64f9bbd85228c14d9cb93c1a402063cab533a9f536f5f7be92c27bc5be8ed13c2b4f7aa416510c754d071 glibc-bin-2.35-0-x86_64.tar.gz -478bdd9f7da9e6453cca91ce0bd20eec031e7424e967696eb3947e3f21aa86067aaf614784b89a117279d8a939174498210eaaa2f277d3942d1ca7b4809d4b7e nsswitch.conf 35f2c9e6cbada640b7c1b50e5d55d7f789864f8b12e419ed966422d99d911dedff96f63c6b3f7ab07b7434aedcd1bd4d45807d5e14f738053d169d11a88bfa53 ld.so.conf " diff --git a/kubezero/kubernetes/APKBUILD b/kubezero/kubernetes/APKBUILD index 26dbedc..afde254 100644 --- a/kubezero/kubernetes/APKBUILD +++ b/kubezero/kubernetes/APKBUILD @@ -5,7 +5,7 @@ # Contributor: Dave # Maintainer: Stefan Reimer pkgname=kubernetes -pkgver=1.24.7 +pkgver=1.25.8 pkgrel=0 pkgdesc="Container Cluster Manager" url="https://kubernetes.io/" @@ -69,10 +69,14 @@ _agent="kubelet" _cli="kubeadm kubectl" _services="kube-apiserver kube-controller-manager kube-proxy kube-scheduler" +export GOCACHE="${GOCACHE:-"$srcdir/go-cache"}" +export GOTMPDIR="${GOTMPDIR:-"$srcdir"}" +export GOMODCACHE="${GOMODCACHE:-"$srcdir/go"}" + build() { make generated_files for _pkgs in $_agent $_cli $_services ; do - make GOFLAGS="-buildmode=pie -v -tags=providerless" GOLDFLAGS="-extldflags=-static -w -s" WHAT=cmd/$_pkgs + make -j1 GOFLAGS="-buildmode=pie -v -tags=providerless" GOLDFLAGS="-extldflags=-static" WHAT=cmd/$_pkgs done } @@ -83,7 +87,6 @@ package() { mkdir -p "$pkgdir"/etc/kubernetes } - _do_subpkg() { local _pkg=$1 pkgdesc="Kubernetes - $_pkg" @@ -204,7 +207,7 @@ _do_zshcomp() { } sha512sums=" -35612859f7b62c027b6edfb6f91ab81af97c711cf0648dbd6bf69665b7c480fa9c0b3f012709401b1a78fef761cee4455a4172f8545b33147911140943636446 kubernetes-1.24.7.tar.gz +30f1815de4d5bf3a091f1937c94e0e6cf0abc0f527488b72ec4a7d72c014bb8fef450abbf4c908f8a5b791e8d4ab501edb3f5c55af4e370359a952a6228362be kubernetes-1.25.8.tar.gz c350bb0a63ada0cc3657fe07598101775243083aa1eabda898080b7b01b129e6fdd7ad1a61950cc039b73b081f38de3b856baedf5c075f39916be1547b11d184 make-e2e_node-run-over-distro-bins.patch 56201491d2dfe3a487931cbf5c6e60af898701b9541a936d80e3823948fcfb98508e3d51f4aaa415ce971f7bd20a7b51f74f025c76b83f58d5a8de8ce0ab679b make-test-cmd-run-over-hyperkube-based-kubectl.patch e690daff2adb1013c92124f32e71f8ed9a18c611ae6ae5fcb5ce9674768dbf9d911a05d7e4028488cda886e63b82e8ac0606d14389a05844c1b5538a33dd09d1 kube-apiserver.initd @@ -219,7 +222,7 @@ d7e022ee22da191bda7382f87cb293d9c9d115a3df0c2054bf918279eb866f99c6d5c21e4c98eae8 561bef5633ba4b9021720624443d9c279a561e5fabea76e5d0fbee2e7ad8999029a2511a45895fbec8448026212a3c5b4c197b248a6afa7f8bd945f705524ea7 kube-scheduler.initd af88b382ab75657d0ff13c3f8f6d924cef9f2df7807a9a27daa63495981801bc4b607998f65c0758c11a7e070e43c24f7184ba7720711109c74b1c4d57919e34 kube-scheduler.confd 3692da349dd6ed0f5acc09d7b95ac562ffecb103e2270bebdfe4a7808d48dada9d2debff262d85b11c47f9ca3f0c20000712d03629ed813ff08a3e02d69267e6 kube-scheduler.logrotated -70fb5c95a02083025f38b099fa50d1a1e8893bc4141955031c4129c0d4aa20fde05c565c3f2e5b6ea31efb954673aeb8289f22eadcedeb7cb89e197898dfc65d kubelet.initd +1b0e0cc45666b18ecb333bf3835b282e9f72e40bf29b7a9d7b9e5e2bbbd009297be8c31d284e865af45d66434a27dee14e617e61fac0dda0242d7d0f3fc89ce8 kubelet.initd f79ea6dec632ca052f8cd061bf0c5f7f821c7013c048b4737752e1a41c677c020c1f9454ddabe7f9ba5d55c2a8a4718170e30049b7212a4a2dc91148a3ac7ebc kubelet.confd 941f4a7579dcf78da2d323ac69195e95eba6600e6fcefe9231447f11c9867a7aa57b4189ee1fefb10eab19c89665ea2e7696b539c92e99fbcde905d2ff85be58 kubelet.logrotated " diff --git a/kubezero/kubernetes/kubelet.initd b/kubezero/kubernetes/kubelet.initd index 7031f4c..d1bb2bb 100755 --- a/kubezero/kubernetes/kubelet.initd +++ b/kubezero/kubernetes/kubelet.initd @@ -18,4 +18,5 @@ pidfile="${KUBELET_PIDFILE:-/run/${RC_SVCNAME}.pid}" depend() { after net need cgroups + want containerd crio } diff --git a/kubezero/kubezero/APKBUILD b/kubezero/kubezero/APKBUILD index 2f19f08..94dca4d 100644 --- a/kubezero/kubezero/APKBUILD +++ b/kubezero/kubezero/APKBUILD @@ -1,24 +1,27 @@ # Contributor: Stefan Reimer # Maintainer: Stefan Reimer pkgname=kubezero -pkgver=1.24 +pkgver=1.25 pkgrel=0 pkgdesc="KubeZero release package" url="https://git.zero-downtime.net/ZeroDownTime/alpine-overlay/src/branch/master/kubezero/kubezero" arch="noarch" license="AGPL-3.0" +# cri-o uses 1.25 to prevent the cgroup leaks, revert back to matching version with 1.25 depends=" cri-tools - cri-o=~$pkgver - kubelet=~$pkgver - kubectl=~$pkgver - ecr-credential-provider=~$pkgver - aws-iam-authenticator=~0.5.9 + cri-o~$pkgver + kubelet~$pkgver + kubectl~$pkgver + ecr-credential-provider~$pkgver + aws-iam-authenticator~0.6.2 " options="!check" +#install="$pkgname.post-install" source=" shared-sys-fs.start + evictLocalNode.sh " build() { @@ -26,10 +29,14 @@ build() { } package() { - # core library + # make /sys shared install -Dm755 "$srcdir"/shared-sys-fs.start "$pkgdir/etc/local.d/shared-sys-fs.start" + + # drain local node + install -Dm755 "$srcdir"/evictLocalNode.sh "$pkgdir/usr/bin/evictLocalNode.sh" } sha512sums=" b0cadf577ea912630efabf8d104f2edaa79bd1697a1f9224ce8a75354dd204196c6d3c15c0318afa44be10be9696ce20ef0015198ee0b74050897d164f77ae60 shared-sys-fs.start +fce1013f7b1bfa8ee526de62e642a37fda3168889723e873d3fb69e257f4caa1423b5a14b9343b12a87f3b6f93c7d3861b854efda67ef2d6a42a5ca8cf3d1593 evictLocalNode.sh " diff --git a/kubezero/kubezero/evictLocalNode.sh b/kubezero/kubezero/evictLocalNode.sh new file mode 100644 index 0000000..a989f59 --- /dev/null +++ b/kubezero/kubezero/evictLocalNode.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# Get api server and node name for kubelet context +API_SERVER=$(kubectl --kubeconfig /root/.kube/config config view -o json | jq -r '.clusters[].cluster.server') +NODE_NAME=$(kubectl --kubeconfig /root/.kube/config config view -o json | jq -r '.["current-context"]' | sed -e 's/system:node://' -e 's/@.*//') + +if [ -z "$API_SERVER" ] || [ -z "$NODE_NAME" ]; then + echo "Could not find kube context. Abort." + exit 1 +fi + +# Delete all node status as we are not allowed to cordon ourselfs +curl -s --cacert /etc/kubernetes/pki/ca.crt \ + --cert /var/lib/kubelet/pki/kubelet-client-current.pem \ + -H "Content-Type: application/json-patch+json" -X PATCH \ + $API_SERVER/api/v1/nodes/$NODE_NAME/status \ + --data '[ { "op": "replace", "path": "/status/conditions", "value": []}]' >/dev/null + +# Loop through all local pods +EVICTED="" +while read NAMESPACE NAME; do + # get pod owner + OWNER=$(curl -s --cacert /etc/kubernetes/pki/ca.crt \ + --cert /var/lib/kubelet/pki/kubelet-client-current.pem \ + -H 'Content-type: application/json' \ + "$API_SERVER"/api/v1/namespaces/"$NAMESPACE"/pods/"$NAME" | jq -r '.metadata.ownerReferences[].kind') + + [ -n "$OWNER" ] || continue + + # skip over DS and static manifests + [[ "$OWNER" =~ (DaemonSet|Node) ]] && continue + + JSON='{ "apiVersion": "policy/v1", "kind": "Eviction", "metadata": { "name": "'$NAME'", "namespace": "'$NAMESPACE'" } }' + HTTP_CODE=$(curl -o /dev/null -s -w "%{http_code}\n" --cacert /etc/kubernetes/pki/ca.crt \ + --cert /var/lib/kubelet/pki/kubelet-client-current.pem \ + -X POST -H 'Content-type: application/json' \ + --data-raw "$JSON" \ + "$API_SERVER"/api/v1/namespaces/"$NAMESPACE"/pods/"$NAME"/eviction) + + if [ "$HTTP_CODE" = "201" ]; then + echo "Evicted $NAMESPACE/$NAME" + EVICTED="$EVICTED $NAME" + else + echo "Error trying to evict $NAMESPACE/$NAME" + fi +done < <(crictl pods -o json | jq -r '.items[].metadata | {name,namespace} | .namespace + " " + .name') + +# Stop all successfully evicted pods in parallel and wait till all stopped +for name in $EVICTED; do + crictl stopp $(crictl pods -o json --name $name | jq -r '.items[].id') & +done +wait diff --git a/kubezero/nvidia-container-toolkit/etc/nvidia-container-runtime/config.toml b/kubezero/nvidia-container-toolkit/etc/nvidia-container-runtime/config.toml new file mode 100644 index 0000000..225ec45 --- /dev/null +++ b/kubezero/nvidia-container-toolkit/etc/nvidia-container-runtime/config.toml @@ -0,0 +1,32 @@ +disable-require = false +#swarm-resource = "DOCKER_RESOURCE_GPU" +#accept-nvidia-visible-devices-envvar-when-unprivileged = true +#accept-nvidia-visible-devices-as-volume-mounts = false + +[nvidia-container-cli] +#root = "/run/nvidia/driver" +#path = "/usr/bin/nvidia-container-cli" +environment = [] +#debug = "/var/log/nvidia-container-toolkit.log" +#ldcache = "/etc/ld.so.cache" +load-kmods = true +#no-cgroups = false +#user = "root:video" +ldconfig = "@/sbin/ldconfig" + +[nvidia-container-runtime] +#debug = "/var/log/nvidia-container-runtime.log" +log-level = "info" + +# Specify the runtimes to consider. This list is processed in order and the PATH +# searched for matching executables unless the entry is an absolute path. +runtimes = [ + "docker-runc", + "runc", +] + +mode = "auto" + + [nvidia-container-runtime.modes.csv] + + mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d" diff --git a/kubezero/nvidia-open-gpu/APKBUILD b/kubezero/nvidia-open-gpu/APKBUILD index d00cdad..0e03014 100644 --- a/kubezero/nvidia-open-gpu/APKBUILD +++ b/kubezero/nvidia-open-gpu/APKBUILD @@ -25,7 +25,7 @@ build() { package() { KERNEL_VERSION=$(basename $(ls -d /lib/modules/*-virt)) - depends="linux-virt=~$(echo $KERNEL_VERSION | sed -e 's/-.*$//')" + depends="linux-virt~$(echo $KERNEL_VERSION | sed -e 's/-.*$//')" modules="nvidia.ko \ nvidia-drm.ko \ diff --git a/kubezero/zdt-base/APKBUILD b/kubezero/zdt-base/APKBUILD index e81ea3d..84e8fc0 100644 --- a/kubezero/zdt-base/APKBUILD +++ b/kubezero/zdt-base/APKBUILD @@ -1,33 +1,38 @@ # Contributor: Stefan Reimer # Maintainer: Stefan Reimer pkgname=zdt-base -pkgver=0.0.1 +pkgver=0.1.2 pkgrel=0 pkgdesc="ZeroDownTime Alpine additions and customizations" url="https://git.zero-downtime.net/ZeroDownTime/alpine-overlay/src/branch/master/kubezero/zdt-base" arch="noarch" license="AGPL-3.0" -depends="logrotate syslog-ng neofetch monit file" +depends="logrotate syslog-ng neofetch monit file tiny-cloud" options="!check" subpackages="$pkgname-openrc $pkgname-aws" install="$pkgname.post-install" source=" - lib-base.sh - cb-mount-var.init - cb-volumes.startstop + cb_base.sh + cb_lock.sh + cb_volumes.sh + cb_init.sh + cloudbender-early.init + cloudbender.init zdt-sysctl.conf https://raw.githubusercontent.com/pixelb/ps_mem/v3.14/ps_mem.py syslog-ng.conf syslog-ng.logrotate.conf + syslog-ng.apparmor monitrc monit_alert.sh.aws neofetch.conf zdt-ascii.txt dhclient.conf - cloudbender.stop + profile route53.py get_iam_sshkeys.py + uniq_hostname.py " build() { @@ -35,8 +40,15 @@ build() { } package() { - # core library - install -Dm755 "$srcdir/lib-base.sh" "$pkgdir/usr/lib/cloudbender/base.sh" + # core libraries + install -Dm755 "$srcdir/cb_base.sh" "$pkgdir/usr/lib/cloudbender/base.sh" + install -Dm755 "$srcdir/cb_lock.sh" "$pkgdir/usr/lib/cloudbender/lock.sh" + install -Dm755 "$srcdir/cb_volumes.sh" "$pkgdir/usr/lib/cloudbender/volumes.sh" + install -Dm755 "$srcdir/cb_init.sh" "$pkgdir/usr/lib/cloudbender/init.sh" + + # convienience + mkdir -p "$pkgdir"/etc/cloudbender "$pkgdir/home/alpine" + install -Dm644 "$srcdir/profile" "$pkgdir/home/alpine/.profile" # dhcp tuning for MTU install -Dm644 "$srcdir"/dhclient.conf "$pkgdir"/etc/dhcp/dhclient.conf @@ -44,18 +56,16 @@ package() { # various sysctl tunings install -Dm644 "$srcdir"/zdt-sysctl.conf "$pkgdir"/etc/sysctl.d/60-zdt.conf - # init script to mount var as early as possible, cannot use any network ! - install -Dm755 "$srcdir/cb-mount-var.init" "$pkgdir/etc/init.d/cb-mount-var" + # early init script to eg. mount var, cannot use any network ! + install -Dm755 "$srcdir/cloudbender-early.init" "$pkgdir/etc/init.d/cloudbender-early" - # ensure "local" init script runs before user-data - mkdir -p "$pkgdir/etc/conf.d" - echo 'rc_before="tiny-cloud-final"' > "$pkgdir/etc/conf.d/local" - install -Dm755 "$srcdir/cb-volumes.startstop" "$pkgdir/etc/local.d/cb-volumes.start" - ( cd $pkgdir/etc/local.d; ln -s cb-volumes.start cb-volumes.stop; ) + # various tasks during boot + install -Dm755 "$srcdir/cloudbender.init" "$pkgdir/etc/init.d/cloudbender" # syslog-ng configs, json all into messages install -Dm644 "$srcdir"/syslog-ng.conf "$pkgdir"/lib/zdt/syslog-ng.conf install -Dm644 "$srcdir"/syslog-ng.logrotate.conf "$pkgdir"/lib/zdt/syslog-ng.logrotate.conf + install -Dm644 "$srcdir"/syslog-ng.apparmor "$pkgdir"/lib/zdt/syslog-ng.apparmor # monit mkdir -p "$pkgdir"/etc/monit.d @@ -74,29 +84,33 @@ package() { aws() { # Basic AWS tools mkdir -p "$subpkgdir" - install -Dm755 "$srcdir"/route53.py "$subpkgdir"/usr/sbin/route53.py - install -Dm755 "$srcdir"/get_iam_sshkeys.py "$subpkgdir"/usr/sbin/get_iam_sshkeys.py + install -Dm755 "$srcdir"/route53.py "$subpkgdir"/usr/sbin/route53.py + install -Dm755 "$srcdir"/uniq_hostname.py "$subpkgdir"/usr/sbin/uniq_hostname.py + install -Dm755 "$srcdir"/get_iam_sshkeys.py "$subpkgdir"/usr/sbin/get_iam_sshkeys.py - # Cloudbender SNS integration - install -Dm755 "$srcdir"/cloudbender.stop "$subpkgdir"/etc/local.d/cloudbender.stop + # Cloudbender SNS integration install -Dm755 "$srcdir"/monit_alert.sh.aws "$pkgdir"/usr/bin/monit_alert.sh - mkdir -p "$subpkgdir"/etc/cloudbender/shutdown.d } sha512sums=" -62e5bd982d3e957ca445891b00cc9fcdc3df22414cd332321a6046ae4ee4c98f9646d3680d83a6d643f01ded229bfea6f968e5734a58a5d233ac899c92ce85da lib-base.sh -0d78bb09b143576b1bc582a62868236e4febed306aa9d085570e91cf9cfbc77dd379342ade9f99203d822f830bbd55d42dcba52cb934952c7b749e252fab1eb3 cb-mount-var.init -b4fbbf55c1a4d38c2877bade1d5e2ce5f1276a6704b0bb95b025e66a7c678710a60a8d4f37cb1f136af1435657cd4ffd03709e80fb61f8950ee39520c1a47f31 cb-volumes.startstop +92e669eb440dbc89e083e3b61c9f9fa871bedfdca404b8c3533fa1caec0d061f428c39697921c68c49d3de9af2043946d9baf989ba47482827d5626fe9f3e479 cb_base.sh +3e02b858680d751b2c1fb22feeec2b767e328fdf007d9fb6687f309e62630467e982cc33d47c4417311938d35bb17dc210f9e0a40b90298dc22cf142f022c124 cb_lock.sh +f392ae57c4a5ccc019eb5fcc191429a4ba81b2593bfb12bab3b3da3365342b2b89908dfd90d8debf813e3455a61ff8d123f49fa4adce8ea44c06d9db1f7b7e19 cb_volumes.sh +89a27da781fe302aaed9ed7dcd1b8fc8b94f39802f86b1faeedee50d68a9d61fcbfe037bc29a938994c507cbcda08d86d2c387db118408ca9b7d085f99bb0a17 cb_init.sh +9c688e08e44ae965eaa6021e005f65077690c30fe6b2de7371c57ae3a53a18f12a356587261c950b8c900f73cb35af3ba7795d33181532b485aeee3b6ca41757 cloudbender-early.init +46500f8dc08e2e5e5d34886225ef4993f02da9f0a8b55107e886337ec1318fe683a172c398d1236e8f2559b57e0aba66238725e0a20e0440e1423d71aa3d77ea cloudbender.init b9479835d8667fa99f8b1b140f969f0464a9bb3c60c7d19b57e306cfe82357d453932791e446caded71fddd379161ae8328367f1ee75ae3afc1b85e12294b621 zdt-sysctl.conf 76e6a4f309f31bfa07de2d3b1faebe5670722752e18157b69d6e868cbe9e85eda393aed0728b0347a01a810eee442844c78259f86ff71e3136a013f4cbfaaea4 ps_mem.py 9d087f2d4403a9c6d4d2f06fbb86519f2b8b134d8eb305facaef07c183815f917fb7bac916d39d504dbab7fdf3321a3f70954dde57e8986cc223371715bb1c54 syslog-ng.conf 484bdcf001b71ce5feed26935db437c613c059790b99f3f5a3e788b129f3e22ba096843585309993446a88c0ab5d60fd0fa530ef3cfb6de1fd34ffc828172329 syslog-ng.logrotate.conf +1db58e670748bf9a507068251c21c9ca1744727bbf53fd925059239a58721f6f60c0bd357e4c52e9277a913640a6784e0025a7f3834868a1e93e9e1cbc66d5a6 syslog-ng.apparmor b928ba547af080a07dc9063d44cb0f258d0e88e7c5a977e8f1cf1263c23608f0a138b8ffca0cdf5818ee72fccb3ce8433f877811be3107bb9c275dfff988179c monitrc 64944727d658ff37e7ff9d22a23869e225e104d9229756ba4fef1fc481c56f782472a1c74f8127636b4a98d4545ae30e7d35822a1f0a0fa31a59ec8eaf8c0396 monit_alert.sh.aws 346b0170ada6cc1207ffb7b8ef138a1570a63c7df4d57618aa4b6b6c0d2df2197b0f5b23578ec83c641ee5e724865ac06985222e125809c990467426a0851b72 neofetch.conf 532b8e2eb04942ab20bdc36b5dea1c60239fcbfcb85706123f3e05c18d65c938b85e9072d964ae5793177625a8db47b532db1f5bd5ed5ecbb70d5a331666ff54 zdt-ascii.txt c565516121b9e6f9d5f769511eb900546753e67cc4208d1b388fdce44cd28699261a5c3905f9a168d4b2d45ac65ac3a2a6a95335f1bbd76d2f444d5f50ec5c9e dhclient.conf -cd7ddd7923d45370275fa26c0f2c6dea930c6788c8f55af4388eb42309125c15e5cbb34b186ab4aebbeac3470bed0ba2db9dd46ba8796242b59092f51c5cedf5 cloudbender.stop +c3e72cd92936b03f2b9eab5e97e9a12fcddcdf2c943342e42e7702e2d2407e00859c62dc9b4de3378688d2f05458aa5c104272af7ab13e53a62f1676d1a9a1b4 profile 2d419d5c25a3829e99326b09876f459e48ab66f5756a8ad39b406c0f2829f5a323e8ff512afd8f32b7b07f24c88efa911bee495ce6c4d1925194cb54d3ba57bd route53.py 00eaff6c0a506580340b2547c3b1602a54238bac6090a15516839411478a4b4fdc138668b8ad23455445131f3a3e3fda175ed4bb0dd375402641c0e7b69c3218 get_iam_sshkeys.py +8fd5dca9b9fdae61022f136215afa8adc0d199afcf26593bdd0bd1946d0f2efc5d7ed345704ef9642fbeedeeea007ed31b67fafe89289b749a560a045da45b8e uniq_hostname.py " diff --git a/kubezero/zdt-base/cb-volumes.startstop b/kubezero/zdt-base/cb-volumes.startstop deleted file mode 100755 index 101af5b..0000000 --- a/kubezero/zdt-base/cb-volumes.startstop +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/sh -# vim:set ts=8 noet ft=sh: -# -. /usr/lib/cloudbender/base.sh - -if [ "${0##*/}" = cb-volumes.start ]; then - get_meta_data - [ -z "$volumes" ] && return 0 - - mount_volumes "$volumes" - -elif [ "${0##*/}" = cb-volumes.stop ]; then - - get_meta_data - [ -z "$volumes" ] && return 0 - - unmount_volumes "$volumes" -fi diff --git a/kubezero/zdt-base/cb_base.sh b/kubezero/zdt-base/cb_base.sh new file mode 100644 index 0000000..1a68d7f --- /dev/null +++ b/kubezero/zdt-base/cb_base.sh @@ -0,0 +1,147 @@ +#!/bin/sh + +function log { logger -t "user-data.${_FUNC}" -- $@; } + +function die { log "$@"; exit_trap 1 1 / "$@"; } + +# msg used for sns event, last one wins +function msg { MSG="$@"; log "$@"; } + +# Generic retry command wrapper, incl. timeout of 30s +# $1 = number of tries; 0 = forever +# $2 = number of seconds to sleep between tries +# $@ actual command +retry() { + local tries=$1 + local waitfor=$2 + shift 2 + while true; do + # Only use timeout of $1 is an executable, call directly if function + type -tf $1 >/dev/null && { timeout --preserve-status 30 $@ && return; } || { $@ && return; } + ((tries=tries-1)) + [ $tries -eq 0 ] && return 1 + sleep $waitfor + done +} + +function add_swap() { + [ -f /.swapfile ] || { dd if=/dev/zero of=/.swapfile bs=1M count=$1 && chmod 600 /.swapfile && mkswap /.swapfile && swapon /.swapfile; } + grep -q "/.swapfile" /etc/fstab || echo "/.swapfile none swap sw 0 0" >> /etc/fstab + sysctl -w vm.swappiness=10 +} + +# Get SSM secure string base64 decoded +# $0 SSM_PATH, value to stdout +function get_secret() { + aws ssm get-parameter --name ${1,,} --with-decryption --query 'Parameter.Value' | base64 -d +} + +# Store values as base64 on SSM +# $0 SSM_PATH VALUE +function put_secret() { + aws ssm put-parameter --name ${1,,} --type SecureString --value "$(echo "$2" | base64 -w0)" --overwrite +} + +# Gets existing passphrase or creates new passphrase and stores it +function init_passphrase() { + local _URL=$1 + local _PPFILE=$2 + + # If secret already exists noop + [ -f $_PPFILE ] && return 0 + + get_secret $_URL > $_PPFILE && chmod 600 $_PPFILE || \ + { xxd -l16 -p /dev/random > $_PPFILE; chmod 600 $_PPFILE; put_secret $_URL "$(cat $_PPFILE)"; } +} + +function asg_heartbeat { + [ -n "$LAUNCH_HOOK" ] && aws autoscaling record-lifecycle-action-heartbeat --instance-id $INSTANCE_ID --lifecycle-hook-name $LAUNCH_HOOK --auto-scaling-group-name $AWS_AUTOSCALING_GROUPNAME || true +} + +function setup_sns_alarms() { + # store SNS message json template + cat < /etc/cloudbender/sns_alarm.json +{ + "Source": "CloudBender", + "AWSAccountId": "$AWS_ACCOUNT_ID", + "Region": "$REGION", + "Artifact": "$ARTIFACT", + "Asg": "$AWS_AUTOSCALING_GROUPNAME", + "Instance": "$INSTANCE_ID", + "ip": "$IP_ADDRESS" +} +EOF + mkdir -p /var/lib/cloudbender + cat < /var/lib/cloudbender/sns_alarm.sh +#!/bin/bash + +SUBJECT=\$1 +MSG=\$2 +LEVEL=\${3:-Info} +ATTACHMENT=\${4:-""} +EMOJI=\${5:-""} + +jq -M --arg subject "\$SUBJECT" --arg level "\$LEVEL" --arg msg "\$MSG" --arg attachment "\$ATTACHMENT" --arg emoji "\$EMOJI" --arg hostname "\$HOSTNAME" '.Subject = \$subject | .Level = \$level | .Message = \$msg | .Attachment = \$attachment | .Emoji = \$emoji | .Hostname = \$hostname' < /etc/cloudbender/sns_alarm.json | sed -e 's/\\\\\\\\/\\\\/g' > /tmp/sns.json +aws sns publish --region ${REGION} --target-arn $ALARMSNSARN --message file:///tmp/sns.json +EOF + chmod +x /var/lib/cloudbender/sns_alarm.sh +} + +function exit_trap { + set +e + trap - ERR EXIT + local ERR_CODE=$1 + local ERR_LINE="$2" + local ERR_FUNC="$3" + local ERR_CMD="$4" + + if [ $ERR_CODE -ne 0 ]; then + CFN_STATUS="FAILURE" + RESULT="ABANDON" + else + CFN_STATUS="SUCCESS" + RESULT="CONTINUE" + fi + + # Add SNS events on demand + if [ "x${ALARMSNSARN}" != 'x' ]; then + if [ $ERR_CODE -ne 0 ]; then + LEVEL="Error" + SUBJECT="Error during cloud-init." + if [ $ERR_LINE -ne 1 ]; then + MSG="$ERR_CMD failed in $ERR_FUNC at $ERR_LINE. Return: $ERR_CODE" + ATTACHMENT="$(pr -tn $0 | tail -n+$((ERR_LINE - 3)) | head -n7)" + else + MSG="$ERR_CMD" + fi + + if [ -n "$DEBUG" ]; then + SUBJECT="$SUBJECT Instance kept running for debug." + else + SUBJECT="$SUBJECT Instance terminated by ASG lifecycle hook." + fi + else + LEVEL="Info" + SUBJECT="ZDT Alpine Instance launched." + fi + + if [ -z "${DISABLECLOUDBENDERSNSSCALINGEVENTS}" ] || [ "$LEVEL" != "Info" ]; then + /var/lib/cloudbender/sns_alarm.sh "$SUBJECT" "$MSG" "$LEVEL" "$ATTACHMENT" + fi + + # Disable scaling events during shutdown + [ -n "${DISABLECLOUDBENDERSNSSCALINGEVENTS}" ] && echo "DISABLE_SCALING_EVENTS=1" >> /etc/cloudbender/rc.conf + fi + + [ -n "$LAUNCH_HOOK" ] && aws autoscaling complete-lifecycle-action --lifecycle-action-result $RESULT --instance-id $INSTANCE_ID --lifecycle-hook-name $LAUNCH_HOOK --auto-scaling-group-name ${AWS_AUTOSCALING_GROUPNAME} || true + + if [ -n "${AWS_CLOUDFORMATION_LOGICAL_ID}" ]; then + aws cloudformation signal-resource --stack-name ${AWS_CLOUDFORMATION_STACK_NAME} --logical-resource-id ${AWS_CLOUDFORMATION_LOGICAL_ID} --unique-id ${INSTANCE_ID} --status ${CFN_STATUS} + fi + + # timestamp being done + end_uptime=$(awk '{print $1}' < /proc/uptime) + log "Exiting user-data. Duration: $(echo "$end_uptime-$start_uptime" | bc) seconds" + + exit 0 +} diff --git a/kubezero/zdt-base/cb_init.sh b/kubezero/zdt-base/cb_init.sh new file mode 100644 index 0000000..854fd75 --- /dev/null +++ b/kubezero/zdt-base/cb_init.sh @@ -0,0 +1,108 @@ +# We built on top of tiny-cloud +. /etc/conf.d/tiny-cloud + +IMDS_ENDPOINT="169.254.169.254" +. /lib/tiny-cloud/"$CLOUD"/imds + +_imds() { + wget --quiet --timeout 1 --output-document - \ + --header "$(_imds_header)" \ + "http://$IMDS_ENDPOINT/$IMDS_URI/$1$IMDS_QUERY" +} + +function query_imds() { + MAC=$(_imds meta-data/mac) + AVAILABILITY_ZONE=$(_imds meta-data/placement/availability-zone) + REGION=$(echo ${AVAILABILITY_ZONE} | sed "s/[a-z]$//") + INSTANCE_ID=$(_imds meta-data/instance-id) + + cat <> /var/lib/cloud/meta-data +AVAILABILITY_ZONE=$AVAILABILITY_ZONE +REGION=$REGION +INSTANCE_ID=$INSTANCE_ID + +# Get the internal IP of first interface +IP_ADDRESS=$(_imds meta-data/local-ipv4) +PUBLIC_IP_ADDRESS=$(_imds meta-data/public-ipv4 || true) + +MAC=$MAC +VPC_CIDR_RANGE=$(_imds meta-data/network/interfaces/macs/${MAC}/vpc-ipv4-cidr-block) +SUBNET=$(_imds meta-data/network/interfaces/macs/${MAC}/subnet-ipv4-cidr-block) + +# Make sure we have basic domain / hostname set at all time +_META_HOSTNAME=$(_imds meta-data/hostname) +DOMAIN_NAME=\${_META_HOSTNAME#*.} +HOSTNAME=\${_META_HOSTNAME%%.*} + +AWS_ACCOUNT_ID=$(_imds meta-data/network/interfaces/macs/${MAC}/owner-id) +INSTANCE_LIFE_CYCLE=$(_imds meta-data/instance-life-cycle) + +INSTANCE_TYPE=$(_imds meta-data/instance-type) +EOF +} + +function get_tags() { + # via metadata AWS restricts tags to NOT have " " or "/" ;-( + #for key in $(_imds meta-data/tags/instance); do + # TAGS[$key]=$(_imds meta-data/tags/instance/"$key") + #done + # Replace all /:.- with _ for valid variable names + while read _key value; do + key=$(echo ${_key//[\/:.-]/_} | tr '[:lower:]' '[:upper:]') + echo "$key=\"$value\"" >> /var/lib/cloud/meta-data + done < <(aws ec2 describe-tags --filters "Name=resource-id,Values=${INSTANCE_ID}" --query 'Tags[*].[Key,Value]') +} + +# extract user-data args and cloud meta-data into /var/lib/cloud/meta-data +get_meta_data() { + if [ ! -f /var/lib/cloud/meta-data ]; then + ebegin "collecting metadata, instance tags and parameters" + + echo '#!/bin/bash' > /var/lib/cloud/meta-data + + query_imds + + export AWS_DEFAULT_REGION=$REGION + export AWS_DEFAULT_OUTPUT=text + + get_tags + + [ -f /var/lib/cloud/user-data ] && bash /var/lib/cloud/user-data extract_parameters + fi + + . /var/lib/cloud/meta-data + + # Workaround for current CFN ASG_ hack + _key=$(echo $AWS_CLOUDFORMATION_LOGICAL_ID | tr '[:lower:]' '[:upper:]') + [ -n "$(eval echo \$${_key}_CUSTOMHOSTNAME)" ] && CUSTOMHOSTNAME="$(eval echo \$${_key}_CUSTOMHOSTNAME)" + [ -n "$(eval echo \$${_key}_VOLUMES)" ] && VOLUMES="$(eval echo \$${_key}_VOLUMES)" + + return 0 +} + + +################ +# IAM SSH KEYS # +################ +cb_sshkeys() { + case "$CLOUD" in + aws) + # on AWS call IAM for allowed groups and actual keys + GROUP=${SSHKEYIAMGROUP:-""} + ROLE=${SSHKEYIAMROLE:-"arn:aws:iam::000000000000:role/Undefined"} + [ $ROLE == "arn:aws:iam::000000000000:role/Undefined" ] && ROLE="" + + if [ -n "$GROUP" ]; then + # Configure SSHD + sed -i -e "s,^[\s#]*AuthorizedKeysCommand\s.*,AuthorizedKeysCommand /usr/sbin/get_iam_sshkeys.py --user %u --group $GROUP --iamRole \"$ROLE\"," /etc/ssh/sshd_config + sed -i -e "s,^[\s#]*AuthorizedKeysCommandUser\s.*,AuthorizedKeysCommandUser nobody," /etc/ssh/sshd_config + + ebegin "added $GROUP to SSH admin keys" + fi + ;; + *) + ewarn "Unsupported Cloud: $CLOUD" + return 1 + ;; + esac +} diff --git a/kubezero/zdt-base/cb_lock.sh b/kubezero/zdt-base/cb_lock.sh new file mode 100644 index 0000000..f8e0dea --- /dev/null +++ b/kubezero/zdt-base/cb_lock.sh @@ -0,0 +1,67 @@ +MUTEX=mutex +MUTEX_OWNER=$HOSTNAME +MUTEX_TIMEOUT=600 + +release_lock() { + local S3LOCK=$1 + + rm -f $MUTEX + aws s3 rm $S3LOCK +} + +# Lock not timed out and we own it: 0 +# Lock not timed out and someone else owns it: 1 +# Lock timed out: 2 +verify_lock() { + local S3LOCK=$1 + + aws s3 cp $S3LOCK $MUTEX + _host=$(grep "MUTEX_OWNER=" $MUTEX | sed -e 's/MUTEX_OWNER=//') + _time=$(grep "MUTEX_TIME=" $MUTEX | sed -e 's/MUTEX_TIME=//') + + # Check for timestamp and timeout + let timepassed=$(date -u +%s)-$_time + [ $timepassed -gt $MUTEX_TIMEOUT ] && return 2 + + [ "$_host" == "$MUTEX_OWNER" ] && return 0 + return 1 +} + +aquire_lock() { + local S3LOCK=$1 + + echo "MUTEX_OWNER=${MUTEX_OWNER}" > $MUTEX + echo "MUTEX_TIME=$(date -u +%s)" >> $MUTEX + aws s3 cp $MUTEX $S3LOCK + + # verify we actually got the lock + sleep 2 + verify_lock $S3LOCK +} + +check_lock() { + local S3LOCK=$1 + + aws s3 ls $S3LOCK && rc=$? || rc=$? + + # No LOCK ? -> get it ! + if [ $rc -ne 0 ]; then + aquire_lock $S3LOCK + + else + verify_lock $S3LOCK && rc=$? || rc=$? + + # Lock timeout -> we get it + if [ $rc -eq 2 ]; then + aquire_lock $S3LOCK + + # we already own it + elif [ $rc -eq 0 ]; then + return 0 + + # someone else has a valid lock + else + return 1 + fi + fi +} diff --git a/kubezero/zdt-base/lib-base.sh b/kubezero/zdt-base/cb_volumes.sh similarity index 84% rename from kubezero/zdt-base/lib-base.sh rename to kubezero/zdt-base/cb_volumes.sh index d60abc8..70b98c4 100644 --- a/kubezero/zdt-base/lib-base.sh +++ b/kubezero/zdt-base/cb_volumes.sh @@ -3,15 +3,6 @@ # We built on top of tiny-cloud . /etc/conf.d/tiny-cloud -# extract user-data args and cloud meta-data into /var/lib/cloud/meta-data -get_meta_data() { - if [ ! -f /var/lib/cloud/meta-data ]; then - [ -f /var/lib/cloud/user-data ] && bash /var/lib/cloud/user-data get_meta_data || echo "Error trying to extract cloud meta-data" >&2 - fi - - . /var/lib/cloud/meta-data -} - # archive orig /var, mount new var on top and restore orig var copy_and_mount() { local dev=$1 @@ -28,7 +19,7 @@ setup_var() { [ -b "$dev" ] || continue # already mounted - mount | grep -q "$dev" && continue + mount | grep -q "$d" && continue case "$CLOUD" in aws) @@ -38,16 +29,17 @@ setup_var() { type=$(file -Lbs $d) if [[ "$type" =~ "XFS filesystem" ]]; then xfs_repair $d >/dev/null 2>&1 + mount -t xfs -o noatime "$d" /var else mkfs.xfs -qf $d >/dev/null + copy_and_mount "$d" fi - copy_and_mount "$dev" - grep -q "$dev" /etc/fstab || echo "$dev /var xfs defaults,noatime,nofail 0 2" >> /etc/fstab + grep -q "$d" /etc/fstab || echo "$d /var xfs defaults,noatime,nofail 0 2" >> /etc/fstab fi ;; *) - echo "Unsupported Cloud '$CLOUD'" >&2 - exit 1 + ewarn "Unsupported cloud: $CLOUD" + return 1 ;; esac done @@ -122,6 +114,8 @@ mount_volumes() { # mount mkdir -p $volPath mount -t xfs -o noatime $volDevice $volPath + + ebegin "mounting $volDevice at $volPath" done } diff --git a/kubezero/zdt-base/cb-mount-var.init b/kubezero/zdt-base/cloudbender-early.init similarity index 64% rename from kubezero/zdt-base/cb-mount-var.init rename to kubezero/zdt-base/cloudbender-early.init index 0f8338d..49f9bf5 100755 --- a/kubezero/zdt-base/cb-mount-var.init +++ b/kubezero/zdt-base/cloudbender-early.init @@ -1,7 +1,7 @@ #!/sbin/openrc-run # vim:set ts=8 noet ft=sh: -description="Find suitable block device, prepare and mount it under /var" +description="CloudBender early - eg. mount suitable block device at /var" depend() { need fsck root @@ -11,7 +11,7 @@ depend() { } start() { - source /usr/lib/cloudbender/base.sh + source /usr/lib/cloudbender/volumes.sh ebegin "Looking for suitable /var" setup_var diff --git a/kubezero/zdt-base/cloudbender.init b/kubezero/zdt-base/cloudbender.init new file mode 100755 index 0000000..e17a304 --- /dev/null +++ b/kubezero/zdt-base/cloudbender.init @@ -0,0 +1,48 @@ +#!/sbin/openrc-run +# vim:set ts=8 noet ft=sh: +# + +description="CloudBender - mount additional volumes, send shutdown messages" + +depend() { + need net + before sshd + after tiny-cloud +} + +start() { + source /usr/lib/cloudbender/init.sh + source /usr/lib/cloudbender/base.sh + source /usr/lib/cloudbender/volumes.sh + + get_meta_data + + # mount extra volumes as early as possible + [ -n "$VOLUMES" ] && mount_volumes "$VOLUMES" + + # allow optional ssh keys, eg. via IAM for AWS + cb_sshkeys + + eend $? +} + + +stop() { + source /usr/lib/cloudbender/init.sh + source /usr/lib/cloudbender/base.sh + source /usr/lib/cloudbender/volumes.sh + + get_meta_data + + [ -n "$VOLUMES" ] && unmount_volumes "$VOLUMES" + + # Include dynamic config setting create at boot + [ -r /etc/cloudbender/rc.conf ] && . /etc/cloudbender/rc.conf + + [ -n "$DEBUG" ] && [ -r /tmp/shutdown.log ] && SHUTDOWNLOG="$(cat /tmp/shutdown.log)" + + [ -n "$RC_REBOOT" ] && ACTION="rebooting" || ACTION="terminated" + [ -z "$DISABLE_SCALING_EVENTS" ] && /var/lib/cloudbender/sns_alarm.sh "Instance $ACTION" "" Info "$SHUTDOWNLOG" + + eend $? +} diff --git a/kubezero/zdt-base/cloudbender.stop b/kubezero/zdt-base/cloudbender.stop deleted file mode 100755 index 01564c2..0000000 --- a/kubezero/zdt-base/cloudbender.stop +++ /dev/null @@ -1,15 +0,0 @@ -# Include dynamic config setting create at boot -[ -r /etc/cloudbender/rc.conf ] && . /etc/cloudbender/rc.conf - -rm -f /tmp/shutdown.log - -for cmd in $(ls /etc/cloudbender/shutdown.d/* | sort); do - . $cmd 1>>/tmp/shutdown.log 2>&1 -done - -[ $DEBUG -eq 1 ] && SHUTDOWNLOG="$(cat /tmp/shutdown.log)" - -[ -n "$RC_REBOOT" ] && ACTION="rebooting" || ACTION="terminated" -[ -z "$DISABLE_SCALING_EVENTS" ] && /var/lib/cloudbender/sns_alarm.sh "Instance $ACTION" "" Info "$SHUTDOWNLOG" - -sleep ${SHUTDOWN_PAUSE:-0} diff --git a/kubezero/zdt-base/profile b/kubezero/zdt-base/profile new file mode 100644 index 0000000..5f256e8 --- /dev/null +++ b/kubezero/zdt-base/profile @@ -0,0 +1,4 @@ +alias sudo='doas' +alias cu='doas cat /var/log/user-data.log' +alias cl="doas cat /var/log/messages | jq -r '\"\(.time): \(.message)\"'" +alias tl="doas tail -f /var/log/messages | jq -r '\"\(.time): \(.message)\"'" diff --git a/kubezero/zdt-base/syslog-ng.apparmor b/kubezero/zdt-base/syslog-ng.apparmor new file mode 100644 index 0000000..7f32c17 --- /dev/null +++ b/kubezero/zdt-base/syslog-ng.apparmor @@ -0,0 +1,4 @@ +/var/lib/syslog-ng/syslog-ng.ctl rw, +@{PROC}/@{pid}/cmdline r, +@{PROC}/@{pid}/loginuid r, +@{PROC}/@{pid}/sessionid r, diff --git a/kubezero/zdt-base/uniq_hostname.py b/kubezero/zdt-base/uniq_hostname.py new file mode 100755 index 0000000..587d2c0 --- /dev/null +++ b/kubezero/zdt-base/uniq_hostname.py @@ -0,0 +1,102 @@ +#!/usr/bin/python3 + +# use pyminify: pyminifier --obfuscate-variables $0 > minified_$0 + +import sys +import boto3 +import time + +my_tag = 'CustomHostname' + +ec2_client = boto3.client('ec2') + + +def get_tag(resource, tag_name): + try: + for tag in resource['Tags']: + if tag['Key'] == tag_name: + return tag['Value'] + + except KeyError: + pass + + return None + + +def enumerate_instances(launchtemplate_id, hostname_format): + """ Search for all names in use as well as all instances in flight """ + in_use = [] + in_flight = [] + + try: + # look for other instances sharing the same launch template which works across multiple ASGs eg. kube-control + for r in ec2_client.describe_instances(Filters=[{'Name': 'instance-state-name', 'Values': ['pending', 'running', 'stopping', 'stopped']}, {'Name': 'tag:aws:ec2launchtemplate:id', 'Values': [launchtemplate_id]}])['Reservations']: + for instance in r["Instances"]: + # If an instance already has the tag mark index as used + hostname = get_tag(instance, my_tag) + if hostname: + in_use.append(hostname) + else: + in_flight.append(instance['InstanceId']) + + except (KeyError, IndexError): + pass + + in_flight = sorted(in_flight) + + return in_use, in_flight + + +# Test format string +try: + launchtemplate_id = sys.argv[1] + my_instance_id = sys.argv[2] + hostname_format = sys.argv[3] + + hostname_format.format(1) + print("Using {0} as Hostname format string".format( + hostname_format), file=sys.stderr) +except (IndexError): + print("Invalid or missing format string, aborting", file=sys.stderr) + sys.exit(0) + +timeout = 0 +while True: + in_use, in_flight = enumerate_instances(launchtemplate_id, hostname_format) + + # Ideally in_flight has only one element, my_instance_id + # otherwise we have a race condition + # if my_instance_id is the first element in the sorted list we proceed to break the race + # otherwise wait for 5 seconds and try again + if my_instance_id not in in_flight: + print("Seems like instance {0} is already in_use, aborting".format( + my_instance_id), file=sys.stderr) + break + + if in_flight[0] == my_instance_id: + # Now lets find the lowest free index + index = 0 + my_hostname = hostname_format.format(index) + for name in sorted(in_use): + if name != my_hostname: + break + index += 1 + my_hostname = hostname_format.format(index) + + print("Assigning {0} to {1}".format( + my_hostname, my_instance_id), file=sys.stderr) + + ec2_client.create_tags(Resources=[str(my_instance_id)], Tags=[ + {'Key': str(my_tag), 'Value': str(my_hostname)}]) + print(my_hostname) + break + + print("{0} are in flight and we are not the first, {1} are currently in use, retrying...".format( + in_flight, in_use), file=sys.stderr) + timeout += 2 + + if timeout > 300: + print("Parallel booting instances did not settle within time limit (300s). Giving up!", file=sys.stderr) + break + + time.sleep(2) diff --git a/kubezero/zdt-base/zdt-base.post-install b/kubezero/zdt-base/zdt-base.post-install index 599a9ed..ca4178a 100644 --- a/kubezero/zdt-base/zdt-base.post-install +++ b/kubezero/zdt-base/zdt-base.post-install @@ -13,9 +13,10 @@ echo 'enabled cgroupv2, openRC logging' #sed -i -e 's/^[\s#]*rc_parallel=.*/rc_parallel="YES"/' /etc/rc.conf #echo 'enable parallel openRC' -# Setup syslog-ng json logging +# Setup syslog-ng json logging and apparmor tweaks cp /lib/zdt/syslog-ng.conf /etc/syslog-ng/syslog-ng.conf cp /lib/zdt/syslog-ng.logrotate.conf /etc/logrotate.d/syslog-ng +cp /lib/zdt/syslog-ng.apparmor /etc/apparmor.d/local/sbin.syslog-ng mv /etc/periodic/daily/logrotate /etc/periodic/hourly/ echo 'syslog-ng: all to /var/log/messages as json, rotate hourly'