Kubezero 1.26.7-rc1

This commit is contained in:
Stefan Reimer 2023-08-18 15:17:00 +00:00
parent 0402adbf2e
commit 961d6621da
27 changed files with 871 additions and 534 deletions

View File

@ -47,13 +47,13 @@ apk: packages distfiles
download:
aws s3 sync s3://zero-downtime-web-cdn/alpine/$(RELEASE)/kubezero/x86_64/ packages/kubezero/x86_64/ --exclude APKINDEX.tar.gz
aws s3 sync s3://zero-downtime-web-cdn/alpine/$(RELEASE)/kubezero/aarch64/ packages/kubezero/aarch64/ --exclude APKINDEX.tar.gz
#aws s3 sync s3://zero-downtime-web-cdn/alpine/$(RELEASE)/kubezero/aarch64/ packages/kubezero/aarch64/ --exclude APKINDEX.tar.gz
invalidate_cdn:
aws cloudfront create-invalidation --distribution $(CF_DIST) --paths "/alpine/*"
upload: invalidate_cdn
aws s3 sync --delete packages/kubezero/x86_64/ s3://zero-downtime-web-cdn/alpine/$(RELEASE)/kubezero/x86_64/ --exclude APKINDEX.tar.gz
aws s3 sync --delete packages/kubezero/aarch64/ s3://zero-downtime-web-cdn/alpine/$(RELEASE)/kubezero/aarch64/ --exclude APKINDEX.tar.gz
#aws s3 sync --delete packages/kubezero/aarch64/ s3://zero-downtime-web-cdn/alpine/$(RELEASE)/kubezero/aarch64/ --exclude APKINDEX.tar.gz
aws s3 cp packages/kubezero/x86_64/APKINDEX.tar.gz s3://zero-downtime-web-cdn/alpine/$(RELEASE)/kubezero/x86_64/ --cache-control max-age=1
aws s3 cp packages/kubezero/aarch64/APKINDEX.tar.gz s3://zero-downtime-web-cdn/alpine/$(RELEASE)/kubezero/aarch64/ --cache-control max-age=1
#aws s3 cp packages/kubezero/aarch64/APKINDEX.tar.gz s3://zero-downtime-web-cdn/alpine/$(RELEASE)/kubezero/aarch64/ --cache-control max-age=1

View File

@ -1,7 +1,7 @@
# Contributor: Stefan Reimer <stefan@zero-downtime.net>
# Maintainer: Stefan Reimer <stefan@zero-downtime.net>
pkgname=aws-iam-authenticator
pkgver=0.6.2
pkgver=0.6.10
pkgrel=0
pkgdesc="AWS aws-iam-authenticator"
url="https://github.com/kubernetes-sigs/aws-iam-authenticator"
@ -20,5 +20,5 @@ package() {
}
sha512sums="
4789fe7c11d4d1b94da5f35844a0da8e62da743bef3fc13f668c542f3dbc83584ef29abbcebc6f4651aad8ecbd9195d6bfc13476c7dd4a1d34ed11822652fc5e aws-iam-authenticator-0.6.2.tar.gz
2b5da6dfbec1f5483ead8da280de8dd719b71157a9bfa4751c015dbc77a4f4c01a59486015cd2231ffb4232a0bf4a35ef843007605dd0b9fffd51ca0208f8fda aws-iam-authenticator-0.6.10.tar.gz
"

View File

@ -3,7 +3,7 @@
# Contributor: TBK <alpine@jjtc.eu>
# Maintainer: ungleich <foss@ungleich.ch>
pkgname=cri-o
pkgver=1.26.3
pkgver=1.26.4
pkgrel=0
pkgdesc="OCI-based implementation of Kubernetes Container Runtime Interface"
url="https://github.com/cri-o/cri-o/"
@ -52,6 +52,7 @@ source="
makefile-fix-install.patch
fix-test.patch
remove-systemd-files.patch
crictl.yaml
"
# secfixes:
@ -89,15 +90,18 @@ package() {
install -Dm755 "$srcdir"/crio.initd "$pkgdir"/etc/init.d/crio
install -Dm644 "$srcdir"/crio.conf "$pkgdir"/etc/crio/crio.conf
install -Dm644 "$srcdir"/crio.logrotated "$pkgdir"/etc/logrotate.d/crio
install -Dm644 "$srcdir"/crictl.yaml "$pkgdir"/etc/crictl.yaml
}
sha512sums="
58718db358d35b61e5edb8a16185bc534337a1ebfaf0d40ab17efb73c266fb2c337fad3cf92a7d8bcc7a02c4e2180b2b79a5896eb635b43334bcc1366b12baf8 cri-o-1.26.3.tar.gz
99bf6b438da236491fcc33ddaa28aeb381fc40c04138918be98fca1117132c5616598e4d758a6852071a67e4884895494b091c9206490a964a559723f77b84e7 cri-o-1.26.4.tar.gz
1f60719677295c9c5c615eb25d9159bde0af68a132eee67747f57fe76642d457c98c896c6189f85637d7b4ac24ba55fd9eaeb1699f43c3c5077b645f72a479fb crio.conf
26048a219bc426ef4a4f50e96d6e0ded1c55dc8415df9a2773764d2ebcb3d9e91077b2916da1ff32674ca4a53062e41e185503d671dacc3167a018b0066347e1 crio.initd
cfc4c144931400023e6642fa0b9880f0e3c09c187542905ca56044cedafb5e1f1d49708e4352233abee4e02181155c02fc9688bf93202fc8d80dfc1ffc90699b crio.initd
1115228546a696eeebeb6d4b3e5c3152af0c99a2559097fc5829d8b416d979c457b4b1789e0120054babf57f585d3f63cbe49949d40417ae7aab613184bf4516 crio.logrotated
0a567dfa431ab1e53f2a351689be8d588a60cc5fcdbda403ec4f8b6ab9b1c18ad425f6c47f9a5ab1491e3a61a269dc4efa6a59e91e7521fa2b6bb165074aa8e0 cni-plugins-path.patch
f9577aa7b1c90c6809010e9e406e65092251b6e82f6a0adbc3633290aa35f2a21895e1a8b6ba4b6375dcad3e02629b49a34ab16387e1c36eeb32c8f4dac74706 makefile-fix-install.patch
1c1bfa5feeb0c5ddc92271a5ef80edc38d56afa1574ffc124605d5bb227a407b55dd5268df6cebc6720768ac31245e08b7950e5ab2b7f14ba934c94f1e325f86 fix-test.patch
78c150f87027de489289596371dce0465159ced0758776b445deb58990e099de9c654406183c9da3cc909878b24d28db62121b7056cd180a6f2820e79e165cc6 remove-systemd-files.patch
79e1a7c6183ba56f55d923e9d738be945564494042bc011d31e9195f66c268d702ee5c86711d4b46618285fc1b10b59ea55c321390feca770cfc7de334e103bd crictl.yaml
"

View File

@ -0,0 +1,5 @@
# runtime entpoint, see https://github.com/kubernetes/enhancements/issues/2371 might be fixed with 1.29 ?
runtime-endpoint: unix:///run/crio/crio.sock
image-endpoint: unix:///var/run/crio/crio.sock
timeout: 60
debug: false

View File

@ -8,7 +8,9 @@ extra_started_commands="reload"
command="/usr/bin/${RC_SVCNAME}"
command_args="${command_args}"
command_background="true"
start_stop_daemon_args=" \
# We run all containers with nice level 1
start_stop_daemon_args="-N 1 \
--stdout /var/log/${RC_SVCNAME}/${RC_SVCNAME}.log \
--stderr /var/log/${RC_SVCNAME}/${RC_SVCNAME}.log"

View File

@ -5,7 +5,7 @@
# Contributor: Dave <dj.2dixx@gmail.com>
# Maintainer: Stefan Reimer <stefan@zero-downtime.net>
pkgname=kubernetes
pkgver=1.26.6
pkgver=1.26.7
pkgrel=0
pkgdesc="Container Cluster Manager"
url="https://kubernetes.io/"
@ -208,7 +208,7 @@ _do_zshcomp() {
}
sha512sums="
02a6ce3c0ec44dce5b03b97d4a544ac96d43a2342f172c176cbe83b269ffec4c3704f01f620eb11cfdc61728f8431ab6ffdbecc21e0cb29cb388b62c1a930cdf kubernetes-1.26.6.tar.gz
9069e653e87883e54df8e01edf2cce9d847a83d593f13e8281654653924586e73841d1ee302de4de93dadf2a2474e875cf350f03c2aec512c100cb3d4fb7d9c5 kubernetes-1.26.7.tar.gz
5427c2e653504cfd5b0bcaf195d4734ee40947ddfebc9f155cd96dddccfc27692c29d94af4ac99f1018925b52995c593b584c5d7a82df2f185ebce1a9e463c40 make-e2e_node-run-over-distro-bins.patch
94d07edfe7ca52b12e85dd9e29f4c9edcd144abc8d120fb71e2a0507f064afd4bac5dde30da7673a35bdd842b79a4770a03a1f3946bfae361c01dd4dc4903c64 make-test-cmd-run-over-hyperkube-based-kubectl.patch
e690daff2adb1013c92124f32e71f8ed9a18c611ae6ae5fcb5ce9674768dbf9d911a05d7e4028488cda886e63b82e8ac0606d14389a05844c1b5538a33dd09d1 kube-apiserver.initd
@ -223,7 +223,7 @@ d7e022ee22da191bda7382f87cb293d9c9d115a3df0c2054bf918279eb866f99c6d5c21e4c98eae8
561bef5633ba4b9021720624443d9c279a561e5fabea76e5d0fbee2e7ad8999029a2511a45895fbec8448026212a3c5b4c197b248a6afa7f8bd945f705524ea7 kube-scheduler.initd
af88b382ab75657d0ff13c3f8f6d924cef9f2df7807a9a27daa63495981801bc4b607998f65c0758c11a7e070e43c24f7184ba7720711109c74b1c4d57919e34 kube-scheduler.confd
3692da349dd6ed0f5acc09d7b95ac562ffecb103e2270bebdfe4a7808d48dada9d2debff262d85b11c47f9ca3f0c20000712d03629ed813ff08a3e02d69267e6 kube-scheduler.logrotated
1b0e0cc45666b18ecb333bf3835b282e9f72e40bf29b7a9d7b9e5e2bbbd009297be8c31d284e865af45d66434a27dee14e617e61fac0dda0242d7d0f3fc89ce8 kubelet.initd
f79ea6dec632ca052f8cd061bf0c5f7f821c7013c048b4737752e1a41c677c020c1f9454ddabe7f9ba5d55c2a8a4718170e30049b7212a4a2dc91148a3ac7ebc kubelet.confd
73fdb0303e72c006f4570af28312ecee224beb1d6cc1e19003593af377436b4082f6d49bd25cd9cae258ffa01bc9f2f0624d11ef0ecc64c658761888923be812 kubelet.initd
887ee5b4c67198727407e74c92639b23674515d5f049938f8ce5f3ba2eabcf7f321c00c914b254a7b2baa5c2f45a9ae4a945c9c90f1968f1012dbd60245d1b81 kubelet.confd
941f4a7579dcf78da2d323ac69195e95eba6600e6fcefe9231447f11c9867a7aa57b4189ee1fefb10eab19c89665ea2e7696b539c92e99fbcde905d2ff85be58 kubelet.logrotated
"

View File

@ -1 +1,2 @@
command_args="--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf --cgroup-driver=cgroupfs --config=/var/lib/kubelet/config.yaml"
command_args="--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf --config=/var/lib/kubelet/config.yaml --image-credential-provider-bin-dir=/usr/libexec/kubernetes/kubelet-plugins --image-credential-provider-config=/etc/kubernetes/credential-provider.yaml"
rc_after="cloudbender"

View File

@ -5,6 +5,10 @@
supervisor=supervise-daemon
description="Kubelet, a Kubernetes node agent"
# do not start without kubezero node config in place
required_files="/var/lib/kubelet/kubeadm-flags.env /var/lib/kubelet/config.yaml"
respawn_max=0
if [ -e /var/lib/kubelet/kubeadm-flags.env ]; then
. /var/lib/kubelet/kubeadm-flags.env;
fi

View File

@ -15,7 +15,7 @@ depends="
kubelet~$pkgver
kubectl~$pkgver
ecr-credential-provider~$pkgver
aws-iam-authenticator~0.6.2
aws-iam-authenticator~0.6.10
"
options="!check"
#install="$pkgname.post-install"
@ -27,10 +27,13 @@ subpackages="
source="
shared-sys-fs.start
evictLocalNode.sh
credential-provider.yaml
kubelet.monit
crio.conf
"
IMAGES="
quay.io/cilium/cilium:v1.13.4
quay.io/cilium/cilium:v1.13.5
ghcr.io/k8snetworkplumbingwg/multus-cni:v3.9.3
"
@ -43,11 +46,16 @@ build() {
}
package() {
# make /sys shared
install -Dm755 "$srcdir"/shared-sys-fs.start "$pkgdir/etc/local.d/shared-sys-fs.start"
# drain local node
install -Dm755 "$srcdir"/evictLocalNode.sh "$pkgdir/usr/bin/evictLocalNode.sh"
mkdir -p $pkgdir/etc/kubernetes/manifests
install -Dm644 "$srcdir"/credential-provider.yaml "$pkgdir/etc/kubernetes/credential-provider.yaml"
install -Dm644 "$srcdir"/kubelet.monit "$pkgdir/etc/monit.d/kubelet.conf"
# crio settings
install -Dm644 "$srcdir"/crio.conf "$pkgdir/etc/crio.conf.d/01-kubezero.conf"
}
# Preload container images all nodes need to speed up boot time and reduce data transfer
@ -63,4 +71,7 @@ imagecache() {
sha512sums="
ecb33fc3a0ffc378723624858002f9f5e180e851b55b98ab6611ecc6a73d4719bc7de240f87683fc58de8bf577059e6f19b417655b5301ef8c32deff67a29dff shared-sys-fs.start
fce1013f7b1bfa8ee526de62e642a37fda3168889723e873d3fb69e257f4caa1423b5a14b9343b12a87f3b6f93c7d3861b854efda67ef2d6a42a5ca8cf3d1593 evictLocalNode.sh
716ec3404d7016bce57d663f750a18db3ede07c1ba7a2908f9f01f41c5ca8fe4e7232ded27bc2bccd705b11ae5cd26574322a8eacefcf8c102bba0f8e4995e59 credential-provider.yaml
abf062fbb2b94831f5321265a648bd17ddbb198e446e763d64d0cc8e3b7320e1545376cfa57b1491bb296ace28f1623439807a4157a2f32984082e565e2edcba kubelet.monit
064fc245b7ffd67834a2f5fd13cb0bcb5f4a5caf79b8113b3669bf1d0e1a4af2042e69f8f496991de76d621fd01bc7e67de37c59f034584d12622c6af96376ff crio.conf
"

View File

@ -0,0 +1,12 @@
apiVersion: kubelet.config.k8s.io/v1alpha1
kind: CredentialProviderConfig
providers:
- name: ecr-credential-provider
matchImages:
- "*.dkr.ecr.*.amazonaws.com"
- "*.dkr.ecr.*.amazonaws.cn"
- "*.dkr.ecr-fips.*.amazonaws.com"
- "*.dkr.ecr.us-iso-east-1.c2s.ic.gov"
- "*.dkr.ecr.us-isob-east-1.sc2s.sgov.gov"
defaultCacheDuration: "12h"
apiVersion: credentialprovider.kubelet.k8s.io/v1alpha1

View File

@ -0,0 +1,8 @@
[crio.metrics]
enable_metrics = true
[crio.runtime]
default_ulimits = [ "nofile=65535:65535", "memlock=-1:-1" ]
[crio.network]
cni_default_network="multus-cni-network"

View File

@ -0,0 +1,9 @@
check process kubelet pidfile /run/kubelet.pid
start program = "/etc/init.d/kubelet start"
stop program = "/etc/init.d/kubelet stop"
if failed
port 10248
protocol http
request "/healthz"
for 2 cycles
then restart

View File

@ -1,7 +1,7 @@
# Contributor: Stefan Reimer <stefan@zero-downtime.net>
# Maintainer: Stefan Reimer <stefan@zero-downtime.net>
pkgname=nvidia-open-gpu
#pkgver=535.54.03
#pkgver=535.86.05
pkgver=525.125.06
pkgrel=0
pkgdesc="NVIDIA Linux open GPU kernel modules"

View File

@ -13,10 +13,8 @@ subpackages="$pkgname-openrc $pkgname-aws"
install="$pkgname.post-install"
source="
cb_base.sh
cb_lock.sh
cb_volumes.sh
cb_init.sh
common.sh
boot.sh
cloudbender-early.init
cloudbender.init
zdt-sysctl.conf
@ -40,13 +38,11 @@ build() {
package() {
# core libraries
install -Dm755 "$srcdir/cb_base.sh" "$pkgdir/usr/lib/cloudbender/base.sh"
install -Dm755 "$srcdir/cb_lock.sh" "$pkgdir/usr/lib/cloudbender/lock.sh"
install -Dm755 "$srcdir/cb_volumes.sh" "$pkgdir/usr/lib/cloudbender/volumes.sh"
install -Dm755 "$srcdir/cb_init.sh" "$pkgdir/usr/lib/cloudbender/init.sh"
install -Dm755 "$srcdir/boot.sh" "$pkgdir/usr/lib/cloudbender/boot.sh"
install -Dm755 "$srcdir/common.sh" "$pkgdir/usr/lib/cloudbender/common.sh"
# convienience
mkdir -p "$pkgdir"/etc/cloudbender "$pkgdir/home/alpine"
mkdir -p "$pkgdir/home/alpine"
install -Dm644 "$srcdir/profile" "$pkgdir/home/alpine/.profile"
# various sysctl tunings
@ -89,23 +85,21 @@ aws() {
}
sha512sums="
13944b955f3c2db54674cd84d58950fc9d8ca522d96081f05ed9bfef8289e30c3dd508796c94d9b283b9c168aadc70f9a934442ec48a5c0c9f25578e62ea0b78 cb_base.sh
3e02b858680d751b2c1fb22feeec2b767e328fdf007d9fb6687f309e62630467e982cc33d47c4417311938d35bb17dc210f9e0a40b90298dc22cf142f022c124 cb_lock.sh
d7bb357e25aa797b68185036814f1b34d4d6b098de43ef0cf3b71011ebee3d6c2e4bd8fa73a5c7dd6f37c6bbbf3c9c344461a9507346104d4fe783dd1f8b2e23 cb_volumes.sh
4ccae4451de8fa83d1a73182ad539ca218a98f68f0bbfe2fc4d99ade75e802e3baa65b4a0e00ae2a0b3c870e6467b219d1c5a22e04f3930c3efd7de6c3cf38ab cb_init.sh
9c688e08e44ae965eaa6021e005f65077690c30fe6b2de7371c57ae3a53a18f12a356587261c950b8c900f73cb35af3ba7795d33181532b485aeee3b6ca41757 cloudbender-early.init
7fea4ed0ebcf781ae197962c9f6d287c66e7380c758126a1b46c711baf3c7e93d6ccf1d536dada590ca02a7f7b271fd5ecb0dcb4854772cc8b1b70d379249f65 cloudbender.init
a99d8fada2ce90876abbd84d8f72c976d1363e0b1437952aee8b22983b7bc7492803950bcc4dfb9866fcf744b9b6056bdbd53c257780d26814f16c8b0983242f common.sh
7f6a69a77d6a4a3c34928609108b7939cd43a892d72fb14bebc1d935cd66eda3bd625d15eebb4d6026715b36b12919fcaf863ed5f65ffdc0e2de9fc1b969cb3e boot.sh
ee19dcc0b46bdff8581c2661cda69fd8a3fa2de4dd30d96a4ce438b2536043a9f0bc57a6b0d4056e2715a2663a89bc1b07ec33798d5430a2046a65069a327cda cloudbender-early.init
9ca46acc407ff6aa18beec02564c3822db215bd5dc0a94f9bd9258c9b99f85cc40f793e20618509ed7f1e8645407cffb8274d7838b46442ad44e64726e37e3a0 cloudbender.init
b9479835d8667fa99f8b1b140f969f0464a9bb3c60c7d19b57e306cfe82357d453932791e446caded71fddd379161ae8328367f1ee75ae3afc1b85e12294b621 zdt-sysctl.conf
76e6a4f309f31bfa07de2d3b1faebe5670722752e18157b69d6e868cbe9e85eda393aed0728b0347a01a810eee442844c78259f86ff71e3136a013f4cbfaaea4 ps_mem.py
5376f4bf8356ce9249c45e78085073245181e8742c7b4be47c71dcd97a611ae125a7dfd3060502bdd591560af070334f89fe60dbc09c008926149c538ab0560a syslog-ng.conf
484bdcf001b71ce5feed26935db437c613c059790b99f3f5a3e788b129f3e22ba096843585309993446a88c0ab5d60fd0fa530ef3cfb6de1fd34ffc828172329 syslog-ng.logrotate.conf
e86eed7dd2f4507b04050b869927b471e8de26bc7d97e7064850478323380a0580a92de302509901ea531d6e3fa79afcbf24997ef13cd0496bb3ee719ad674ee syslog-ng.apparmor
b928ba547af080a07dc9063d44cb0f258d0e88e7c5a977e8f1cf1263c23608f0a138b8ffca0cdf5818ee72fccb3ce8433f877811be3107bb9c275dfff988179c monitrc
64944727d658ff37e7ff9d22a23869e225e104d9229756ba4fef1fc481c56f782472a1c74f8127636b4a98d4545ae30e7d35822a1f0a0fa31a59ec8eaf8c0396 monit_alert.sh.aws
e00a8f296c76446fe1241bf804c0108f47a2676f377a413ee9fede0943362a6582cad30fe13edd93f3d0daab0e2d7696553fb9458dca62adc05572dce339021a monitrc
c955dabe692c0a4a2fa2b09ab9096f6b14e83064b34ae8d22697096daf6551f00b590d837787d66ea1d0030a7cc30bef583cc4c936c980465663e73aec5fa2dc monit_alert.sh.aws
346b0170ada6cc1207ffb7b8ef138a1570a63c7df4d57618aa4b6b6c0d2df2197b0f5b23578ec83c641ee5e724865ac06985222e125809c990467426a0851b72 neofetch.conf
532b8e2eb04942ab20bdc36b5dea1c60239fcbfcb85706123f3e05c18d65c938b85e9072d964ae5793177625a8db47b532db1f5bd5ed5ecbb70d5a331666ff54 zdt-ascii.txt
c3e72cd92936b03f2b9eab5e97e9a12fcddcdf2c943342e42e7702e2d2407e00859c62dc9b4de3378688d2f05458aa5c104272af7ab13e53a62f1676d1a9a1b4 profile
816049360aa442f9e9aa4d6525795913cfe3dc7c6c14dc4ccad59c0880500f9d42f198edc442fe036bc84ba2690d9c5bc8ae622341d8276b3f14947db6b879b1 route53.py
7da28446762a36a6737c5b30becbce78775bd943b4d0c5ef938a50f49b4f51f66708434aa79004c19d16c56c83f54c8d6d68e1502ebc250c73f8aae12bed83c0 get_iam_sshkeys.py
8fd5dca9b9fdae61022f136215afa8adc0d199afcf26593bdd0bd1946d0f2efc5d7ed345704ef9642fbeedeeea007ed31b67fafe89289b749a560a045da45b8e uniq_hostname.py
ae1941fc45e61fa8d211f5ef7eff2dd01510a6d364c4302cab267812321a10e7434ecc8d8c9263d8671ce5604d04d6531601bf42886a55fb6aec7f321651e1dc uniq_hostname.py
"

52
kubezero/zdt-base/boot.sh Normal file
View File

@ -0,0 +1,52 @@
#!/bin/sh
# We have no metadata nor instance parameters yet!
# We built on top of tiny-cloud
. /lib/tiny-cloud/common
# archive orig /var, mount new var on top and restore orig var
copy_and_mount() {
local dev=$1
tar cf /tmp/var.tar /var 2>/dev/null
mount -t xfs -o noatime "$dev" /var
tar xf /tmp/var.tar -C / && rm -f /tmp/var.tar
}
setup_var() {
local _devices="$(find /dev/xvd[a-z] /dev/sd[a-z] -maxdepth 0 2>/dev/null || true)"
for d in $_devices; do
# resolve to a valid block device
dev="$(realpath "$d")"
[ -b "$dev" ] || continue
# already mounted
mount | grep -q "$d" && continue
case "$CLOUD" in
aws)
# on AWS look for sdx/xvdx
if [ "$d" = "/dev/sdx" -o "$d" = "/dev/xvdx" ]; then
# check volume for existing filesystem
type=$(file -Lbs $d)
if [[ "$type" =~ "XFS filesystem" ]]; then
xfs_repair $d >/dev/null 2>&1
mount -t xfs -o noatime "$d" /var
else
mkfs.xfs -qf $d >/dev/null
copy_and_mount "$d"
fi
add_once /etc/fstab "$d /var xfs defaults,noatime,nofail 0 2"
log -i -t early info "mounted $d at /var"
fi
;;
*)
ewarn "Unsupported cloud: $CLOUD"
return 1
;;
esac
done
}

View File

@ -1,150 +0,0 @@
#!/bin/sh
function log { logger -t "user-data.${_FUNC}" -- $@; }
function die { log "$@"; exit_trap 1 1 / "$@"; }
# msg used for sns event, last one wins
function msg { MSG="$@"; log "$@"; }
# Generic retry command wrapper, incl. timeout of 30s
# $1 = number of tries; 0 = forever
# $2 = number of seconds to sleep between tries
# $@ actual command
retry() {
local tries=$1
local waitfor=$2
shift 2
while true; do
# Only use timeout of $1 is an executable, call directly if function
type -tf $1 >/dev/null && { timeout 30 $@ && return; } || { $@ && return; }
((tries=tries-1))
[ $tries -eq 0 ] && return 1
sleep $waitfor
done
}
function add_swap() {
[ -f /.swapfile ] || { dd if=/dev/zero of=/.swapfile bs=1M count=$1 && chmod 600 /.swapfile && mkswap /.swapfile && swapon /.swapfile; }
grep -q "/.swapfile" /etc/fstab || echo "/.swapfile none swap sw 0 0" >> /etc/fstab
sysctl -w vm.swappiness=10
}
# Get SSM secure string base64 decoded
# $0 SSM_PATH, value to stdout
function get_secret() {
aws ssm get-parameter --name ${1,,} --with-decryption --query 'Parameter.Value' | base64 -d
}
# Store values as base64 on SSM
# $0 SSM_PATH VALUE
function put_secret() {
aws ssm put-parameter --name ${1,,} --type SecureString --value "$(echo "$2" | base64 -w0)" --overwrite
}
# Gets existing passphrase or creates new passphrase and stores it
function init_passphrase() {
local _URL=$1
local _PPFILE=$2
# If secret already exists noop
[ -f $_PPFILE ] && return 0
get_secret $_URL > $_PPFILE && chmod 600 $_PPFILE || \
{ xxd -l16 -p /dev/random > $_PPFILE; chmod 600 $_PPFILE; put_secret $_URL "$(cat $_PPFILE)"; }
}
function asg_heartbeat {
[ -n "$LAUNCH_HOOK" ] && aws autoscaling record-lifecycle-action-heartbeat --instance-id $INSTANCE_ID --lifecycle-hook-name $LAUNCH_HOOK --auto-scaling-group-name $AWS_AUTOSCALING_GROUPNAME || true
}
function setup_sns_alarms() {
# store SNS message json template
cat <<EOF > /etc/cloudbender/sns_alarm.json
{
"Source": "CloudBender",
"AWSAccountId": "$AWS_ACCOUNT_ID",
"Region": "$REGION",
"Artifact": "$ARTIFACT",
"Asg": "$AWS_AUTOSCALING_GROUPNAME",
"Instance": "$INSTANCE_ID",
"ip": "$IP_ADDRESS"
}
EOF
mkdir -p /var/lib/cloudbender
cat <<EOF > /var/lib/cloudbender/sns_alarm.sh
#!/bin/bash
SUBJECT=\$1
MSG=\$2
LEVEL=\${3:-Info}
ATTACHMENT=\${4:-""}
EMOJI=\${5:-""}
jq -M --arg subject "\$SUBJECT" --arg level "\$LEVEL" --arg msg "\$MSG" --arg attachment "\$ATTACHMENT" --arg emoji "\$EMOJI" --arg hostname "\$HOSTNAME" '.Subject = \$subject | .Level = \$level | .Message = \$msg | .Attachment = \$attachment | .Emoji = \$emoji | .Hostname = \$hostname' < /etc/cloudbender/sns_alarm.json | sed -e 's/\\\\\\\\/\\\\/g' > /tmp/sns.json
aws sns publish --region ${REGION} --target-arn $ALARMSNSARN --message file:///tmp/sns.json
EOF
chmod +x /var/lib/cloudbender/sns_alarm.sh
}
function exit_trap {
set +e
trap - ERR EXIT
local ERR_CODE=$1
local ERR_LINE="$2"
local ERR_FUNC="$3"
local ERR_CMD="$4"
if [ $ERR_CODE -ne 0 ]; then
CFN_STATUS="FAILURE"
RESULT="ABANDON"
else
CFN_STATUS="SUCCESS"
RESULT="CONTINUE"
fi
# Add SNS events on demand
if [ "x${ALARMSNSARN}" != 'x' ]; then
if [ $ERR_CODE -ne 0 ]; then
LEVEL="Error"
SUBJECT="Error during cloud-init."
if [ $ERR_LINE -ne 1 ]; then
MSG="$ERR_CMD failed in $ERR_FUNC at $ERR_LINE. Return: $ERR_CODE"
ATTACHMENT="$(pr -tn $0 | tail -n+$((ERR_LINE - 3)) | head -n7)"
else
MSG="$ERR_CMD"
fi
if [ -n "$DEBUG" ]; then
SUBJECT="$SUBJECT Instance kept running for debug."
else
SUBJECT="$SUBJECT Instance terminated by ASG lifecycle hook."
fi
else
LEVEL="Info"
SUBJECT="ZDT Alpine Instance launched."
fi
if [ -z "${DISABLECLOUDBENDERSNSSCALINGEVENTS}" ] || [ "$LEVEL" != "Info" ]; then
/var/lib/cloudbender/sns_alarm.sh "$SUBJECT" "$MSG" "$LEVEL" "$ATTACHMENT"
fi
# Disable scaling events during shutdown
[ -n "${DISABLECLOUDBENDERSNSSCALINGEVENTS}" ] && echo "DISABLE_SCALING_EVENTS=1" >> /etc/cloudbender/rc.conf
fi
[ -n "$LAUNCH_HOOK" ] && aws autoscaling complete-lifecycle-action --lifecycle-action-result $RESULT --instance-id $INSTANCE_ID --lifecycle-hook-name $LAUNCH_HOOK --auto-scaling-group-name ${AWS_AUTOSCALING_GROUPNAME} || true
if [ -n "${AWS_CLOUDFORMATION_LOGICAL_ID}" ]; then
aws cloudformation signal-resource --stack-name ${AWS_CLOUDFORMATION_STACK_NAME} --logical-resource-id ${AWS_CLOUDFORMATION_LOGICAL_ID} --unique-id ${INSTANCE_ID} --status ${CFN_STATUS}
fi
# timestamp being done
end_uptime=$(awk '{print $1}' < /proc/uptime)
log "Exiting user-data. Duration: $(echo "$end_uptime-$start_uptime" | bc) seconds"
# Shutdown / poweroff if we ran into error and not DEBUG
[ $ERR_CODE -ne 0 -a -z "$DEBUG" ] && poweroff
exit 0
}

View File

@ -1,108 +0,0 @@
# We built on top of tiny-cloud
. /etc/tiny-cloud.conf
IMDS_ENDPOINT="169.254.169.254"
. /lib/tiny-cloud/cloud/"$CLOUD"/imds
_imds() {
wget --quiet --timeout 1 --output-document - \
--header "$(_imds_header)" \
"http://$IMDS_ENDPOINT/$IMDS_URI/$1$IMDS_QUERY"
}
function query_imds() {
MAC=$(_imds meta-data/mac)
AVAILABILITY_ZONE=$(_imds meta-data/placement/availability-zone)
REGION=$(echo ${AVAILABILITY_ZONE} | sed "s/[a-z]$//")
INSTANCE_ID=$(_imds meta-data/instance-id)
cat <<EOF >> /var/lib/cloud/meta-data
AVAILABILITY_ZONE=$AVAILABILITY_ZONE
REGION=$REGION
INSTANCE_ID=$INSTANCE_ID
# Get the internal IP of first interface
IP_ADDRESS=$(_imds meta-data/local-ipv4)
PUBLIC_IP_ADDRESS=$(_imds meta-data/public-ipv4 || true)
MAC=$MAC
VPC_CIDR_RANGE=$(_imds meta-data/network/interfaces/macs/${MAC}/vpc-ipv4-cidr-block)
SUBNET=$(_imds meta-data/network/interfaces/macs/${MAC}/subnet-ipv4-cidr-block)
# Make sure we have basic domain / hostname set at all time
_META_HOSTNAME=$(_imds meta-data/hostname)
DOMAIN_NAME=\${_META_HOSTNAME#*.}
HOSTNAME=\${_META_HOSTNAME%%.*}
AWS_ACCOUNT_ID=$(_imds meta-data/network/interfaces/macs/${MAC}/owner-id)
INSTANCE_LIFE_CYCLE=$(_imds meta-data/instance-life-cycle)
INSTANCE_TYPE=$(_imds meta-data/instance-type)
EOF
}
function get_tags() {
# via metadata AWS restricts tags to NOT have " " or "/" ;-(
#for key in $(_imds meta-data/tags/instance); do
# TAGS[$key]=$(_imds meta-data/tags/instance/"$key")
#done
# Replace all /:.- with _ for valid variable names
while read _key value; do
key=$(echo ${_key//[\/:.-]/_} | tr '[:lower:]' '[:upper:]')
echo "$key=\"$value\"" >> /var/lib/cloud/meta-data
done < <(aws ec2 describe-tags --filters "Name=resource-id,Values=${INSTANCE_ID}" --query 'Tags[*].[Key,Value]')
}
# extract user-data args and cloud meta-data into /var/lib/cloud/meta-data
get_meta_data() {
if [ ! -f /var/lib/cloud/meta-data ]; then
ebegin "collecting metadata, instance tags and parameters"
echo '#!/bin/bash' > /var/lib/cloud/meta-data
query_imds
export AWS_DEFAULT_REGION=$REGION
export AWS_DEFAULT_OUTPUT=text
get_tags
[ -f /var/lib/cloud/user-data ] && bash /var/lib/cloud/user-data extract_parameters
fi
. /var/lib/cloud/meta-data
# Workaround for current CFN ASG_<parameter> hack
_key=$(echo $AWS_CLOUDFORMATION_LOGICAL_ID | tr '[:lower:]' '[:upper:]')
[ -n "$(eval echo \$${_key}_CUSTOMHOSTNAME)" ] && CUSTOMHOSTNAME="$(eval echo \$${_key}_CUSTOMHOSTNAME)"
[ -n "$(eval echo \$${_key}_VOLUMES)" ] && VOLUMES="$(eval echo \$${_key}_VOLUMES)"
return 0
}
################
# IAM SSH KEYS #
################
cb_sshkeys() {
case "$CLOUD" in
aws)
# on AWS call IAM for allowed groups and actual keys
GROUP=${SSHKEYIAMGROUP:-""}
ROLE=${SSHKEYIAMROLE:-"arn:aws:iam::000000000000:role/Undefined"}
[ $ROLE == "arn:aws:iam::000000000000:role/Undefined" ] && ROLE=""
if [ -n "$GROUP" ]; then
# Configure SSHD
sed -i -e "s,^[\s#]*AuthorizedKeysCommand\s.*,AuthorizedKeysCommand /usr/sbin/get_iam_sshkeys.py --user %u --group $GROUP --iamRole \"$ROLE\"," /etc/ssh/sshd_config
sed -i -e "s,^[\s#]*AuthorizedKeysCommandUser\s.*,AuthorizedKeysCommandUser nobody," /etc/ssh/sshd_config
ebegin "added $GROUP to SSH admin keys"
fi
;;
*)
ewarn "Unsupported Cloud: $CLOUD"
return 1
;;
esac
}

View File

@ -1,67 +0,0 @@
MUTEX=mutex
MUTEX_OWNER=$HOSTNAME
MUTEX_TIMEOUT=600
release_lock() {
local S3LOCK=$1
rm -f $MUTEX
aws s3 rm $S3LOCK
}
# Lock not timed out and we own it: 0
# Lock not timed out and someone else owns it: 1
# Lock timed out: 2
verify_lock() {
local S3LOCK=$1
aws s3 cp $S3LOCK $MUTEX
_host=$(grep "MUTEX_OWNER=" $MUTEX | sed -e 's/MUTEX_OWNER=//')
_time=$(grep "MUTEX_TIME=" $MUTEX | sed -e 's/MUTEX_TIME=//')
# Check for timestamp and timeout
let timepassed=$(date -u +%s)-$_time
[ $timepassed -gt $MUTEX_TIMEOUT ] && return 2
[ "$_host" == "$MUTEX_OWNER" ] && return 0
return 1
}
aquire_lock() {
local S3LOCK=$1
echo "MUTEX_OWNER=${MUTEX_OWNER}" > $MUTEX
echo "MUTEX_TIME=$(date -u +%s)" >> $MUTEX
aws s3 cp $MUTEX $S3LOCK
# verify we actually got the lock
sleep 2
verify_lock $S3LOCK
}
check_lock() {
local S3LOCK=$1
aws s3 ls $S3LOCK && rc=$? || rc=$?
# No LOCK ? -> get it !
if [ $rc -ne 0 ]; then
aquire_lock $S3LOCK
else
verify_lock $S3LOCK && rc=$? || rc=$?
# Lock timeout -> we get it
if [ $rc -eq 2 ]; then
aquire_lock $S3LOCK
# we already own it
elif [ $rc -eq 0 ]; then
return 0
# someone else has a valid lock
else
return 1
fi
fi
}

View File

@ -1,130 +0,0 @@
#!/bin/sh
# We built on top of tiny-cloud
. /etc/tiny-cloud.conf
# archive orig /var, mount new var on top and restore orig var
copy_and_mount() {
local dev=$1
tar cf /tmp/var.tar /var 2>/dev/null
mount -t xfs -o noatime "$dev" /var
tar xf /tmp/var.tar -C / && rm -f /tmp/var.tar
}
setup_var() {
for d in $(find /dev/xvd[a-z] /dev/sd[a-z]); do
# resolve to a valid block device
dev="$(realpath "$d")"
[ -b "$dev" ] || continue
# already mounted
mount | grep -q "$d" && continue
case "$CLOUD" in
aws)
# on AWS look for sdx/xvdx
if [ "$d" = "/dev/sdx" -o "$d" = "/dev/xvdx" ]; then
# check volume for existing filesystem
type=$(file -Lbs $d)
if [[ "$type" =~ "XFS filesystem" ]]; then
xfs_repair $d >/dev/null 2>&1
mount -t xfs -o noatime "$d" /var
else
mkfs.xfs -qf $d >/dev/null
copy_and_mount "$d"
fi
grep -q "$d" /etc/fstab || echo "$d /var xfs defaults,noatime,nofail 0 2" >> /etc/fstab
fi
;;
*)
ewarn "Unsupported cloud: $CLOUD"
return 1
;;
esac
done
}
attach_ebs() {
local volId="$1"
local device="$2"
local tries=30
while true; do
_json="$(aws ec2 describe-volumes --volume-ids $volId --region $REGION --output json)"
rc=$?; [ $rc -ne 0 ] && return $rc
vol_status=$(echo "$_json" | jq -r .Volumes[].State)
attachId=$(echo "$_json" | jq -r .Volumes[].Attachments[].InstanceId)
[ "$attachId" = "$INSTANCE_ID" ] && break
if [ "$vol_status" = "available" ]; then
aws ec2 attach-volume --volume-id "$volId" --instance-id "$INSTANCE_ID" --region "$REGION" --device "$device" > /dev/null
rc=$?; [ $rc -ne 0 ] && return $rc
break
fi
# if attached but not to us -> detach
if [ "$vol_status" = "in-use" ]; then
aws ec2 detach-volume --volume-id "$volId" --region "$REGION" --force
rc=$?; [ $rc -ne 0 ] && return $rc
fi
((tries=tries-1))
[ $tries -eq 0 ] && return 1
sleep 5
done
}
_parse_volume() {
# Todo: proper checks once all is yaml
# For now just replace ':'
echo $1 | sed -e 's/:/ /g'
}
# mount optional remote volumes
mount_volumes() {
local volumes="$1"
for vol in $volumes; do
# Todo: check volume type and call matching func
read volType volId volDevice volPath < <(_parse_volume $vol)
[ "$volType" != "ebs" ] && { echo "Unknown volume type $volType"; break; }
attach_ebs $volId $volDevice
rc=$?
[ $rc -ne 0 ] && { ewarn "error trying to attach $volId"; break; }
# wait for the block device to become available
while true; do
mdev -s
test -b $volDevice && break
sleep 1
done
# check volume for existing filesystem
type=$(file -Lbs $volDevice)
if [[ "$type" =~ "XFS filesystem" ]]; then
xfs_repair $volDevice >/dev/null 2>&1
else
mkfs.xfs -qf $volDevice >/dev/null
fi
# mount
mkdir -p $volPath
mount -t xfs -o noatime $volDevice $volPath
ebegin "mounting $volDevice at $volPath"
done
}
unmount_volumes() {
local volumes="$1"
for vol in $volumes; do
read volType volId volDevice volPath < <(_parse_volume $vol)
umount $volPath && aws ec2 detach-volume --volume-id "$volId" --instance-id $INSTANCE_ID --region $REGION > /dev/null
done
}

View File

@ -1,7 +1,7 @@
#!/sbin/openrc-run
# vim:set ts=8 noet ft=sh:
description="CloudBender early - eg. mount suitable block device at /var"
description="CloudBender early tasks (no network / metadata available yet)"
depend() {
need fsck root
@ -11,9 +11,9 @@ depend() {
}
start() {
source /usr/lib/cloudbender/volumes.sh
source /usr/lib/cloudbender/boot.sh
ebegin "Looking for suitable /var"
ebegin "CloudBender - early phase"
setup_var
eend $?
}

View File

@ -2,7 +2,7 @@
# vim:set ts=8 noet ft=sh:
#
description="CloudBender - mount additional volumes, send shutdown messages"
description="CloudBender - setup meta_data, mount additional volumes, send shutdown messages"
depend() {
need net
@ -11,38 +11,40 @@ depend() {
}
start() {
source /usr/lib/cloudbender/init.sh
source /usr/lib/cloudbender/base.sh
source /usr/lib/cloudbender/volumes.sh
source /usr/lib/cloudbender/common.sh
get_meta_data
import_meta_data
# mount extra volumes as early as possible
[ -n "$VOLUMES" ] && mount_volumes "$VOLUMES"
# various initial OS tweaks
setup_instance
# allow optional ssh keys, eg. via IAM for AWS
cb_sshkeys
# mount extra optional volumes
mount_volumes "$VOLUMES"
# add optional ssh keys, eg. via IAM for AWS
configure_sshd
set_hostname $CUSTOMHOSTNAME
# if fixed hostname use persistent sshd keys
[ -n "$CUSTOMHOSTNAME" ] && persistent_sshd_hostkeys "/_ssh/${ARTIFACT}/${CONGLOMERATE}/${HOSTNAME}"
eend $?
}
stop() {
source /usr/lib/cloudbender/init.sh
source /usr/lib/cloudbender/base.sh
source /usr/lib/cloudbender/volumes.sh
source /usr/lib/cloudbender/common.sh
get_meta_data
import_meta_data
[ -n "$VOLUMES" ] && unmount_volumes "$VOLUMES"
# Include dynamic config setting create at boot
[ -r /etc/cloudbender/rc.conf ] && . /etc/cloudbender/rc.conf
unmount_volumes "$VOLUMES"
[ -n "$DEBUG" ] && [ -r /tmp/shutdown.log ] && SHUTDOWNLOG="$(cat /tmp/shutdown.log)"
[ -n "$RC_REBOOT" ] && ACTION="rebooting" || ACTION="terminated"
[ -z "$DISABLE_SCALING_EVENTS" ] && /var/lib/cloudbender/sns_alarm.sh "Instance $ACTION" "" Info "$SHUTDOWNLOG"
[ -z "$DISABLE_SCALING_EVENTS" ] && /var/lib/cloud/sns_alarm.sh "Instance $ACTION" "" Info "$SHUTDOWNLOG"
eend $?
}

675
kubezero/zdt-base/common.sh Normal file
View File

@ -0,0 +1,675 @@
# We built on top of tiny-cloud
. /lib/tiny-cloud/common
IMDS_ENDPOINT="169.254.169.254"
. /lib/tiny-cloud/cloud/"$CLOUD"/imds
_imds() {
wget --quiet --timeout 1 --output-document - \
--header "$(_imds_header)" \
"http://$IMDS_ENDPOINT/$IMDS_URI/$1$IMDS_QUERY"
}
# Todo: This should go into a yaml file
query_imds() {
MAC=$(_imds meta-data/mac)
AVAILABILITY_ZONE=$(_imds meta-data/placement/availability-zone)
REGION=$(echo ${AVAILABILITY_ZONE} | sed "s/[a-z]$//")
INSTANCE_ID=$(_imds meta-data/instance-id)
cat <<EOF >> /var/lib/cloud/meta-data
AVAILABILITY_ZONE=$AVAILABILITY_ZONE
REGION=$REGION
INSTANCE_ID=$INSTANCE_ID
IP_ADDRESS=$(_imds meta-data/local-ipv4)
PUBLIC_IP_ADDRESS=$(_imds meta-data/public-ipv4 || true)
DEFAULT_GW_INTERFACE=$(ip -o route get 8.8.8.8 | awk '{print $5}')
MAC=$MAC
VPC_CIDR_RANGE=$(_imds meta-data/network/interfaces/macs/${MAC}/vpc-ipv4-cidr-block)
SUBNET=$(_imds meta-data/network/interfaces/macs/${MAC}/subnet-ipv4-cidr-block)
_META_HOSTNAME=$(_imds meta-data/hostname)
DOMAIN_NAME=\${_META_HOSTNAME#*.}
AWS_ACCOUNT_ID=$(_imds meta-data/network/interfaces/macs/${MAC}/owner-id)
INSTANCE_LIFE_CYCLE=$(_imds meta-data/instance-life-cycle)
INSTANCE_TYPE=$(_imds meta-data/instance-type)
EOF
}
# Todo: This should go into a yaml file
get_tags() {
# via metadata AWS restricts tags to NOT have " " or "/" ;-(
# Replace all /:.- with _ for valid variable names
for key in $(_imds meta-data/tags/instance); do
value="$(_imds meta-data/tags/instance/$key)"
key=$(echo ${key//[\/:.-]/_} | tr '[:lower:]' '[:upper:]')
echo "$key=\"$value\"" >> /var/lib/cloud/meta-data
done
#while read _key value; do
# key=$(echo ${_key//[\/:.-]/_} | tr '[:lower:]' '[:upper:]')
# echo "$key=\"$value\"" >> /var/lib/cloud/meta-data
#done < <(aws ec2 describe-tags --filters "Name=resource-id,Values=${INSTANCE_ID}" --query 'Tags[*].[Key,Value]' --region $REGION --output text)
}
# extract user-data args and cloud meta-data into /var/lib/cloud/meta-data
get_meta_data() {
if [ ! -f /var/lib/cloud/meta-data ]; then
echo '#!/bin/bash' > /var/lib/cloud/meta-data
query_imds
get_tags
fi
if [ ! -f /etc/cloudbender.conf ]; then
bash /var/lib/cloud/user-data extract_parameters
fi
}
import_meta_data() {
. /etc/cloudbender.conf
. /var/lib/cloud/meta-data
export AWS_DEFAULT_REGION=$REGION
export AWS_DEFAULT_OUTPUT=text
# some basic logic
if [ "$DEBUG" == "None" -o "$DEBUG" == "False" ]; then
unset DEBUG
LAUNCH_HOOK="CloudBenderLaunchHook"
fi
# Workaround for current CFN ASG_<parameter> hack
_key=$(echo $AWS_CLOUDFORMATION_LOGICAL_ID | tr '[:lower:]' '[:upper:]')
[ -n "$(eval echo \$${_key}_CUSTOMHOSTNAME)" ] && CUSTOMHOSTNAME="$(eval echo \$${_key}_CUSTOMHOSTNAME)"
[ -n "$(eval echo \$${_key}_VOLUMES)" ] && VOLUMES="$(eval echo \$${_key}_VOLUMES)"
return 0
}
# setup_instance, various OS tweaks impossible to do via AMI baking
setup_instance() {
# create machine-id to emulate systemd
[ -f /etc/machine-id ] || uuidgen > /etc/machine-id
# add and mount bpf file system
add_once /etc/fstab "bpffs /sys/fs/bpf bpf rw,nosuid,nodev,noexec,relatime,mode=700 0 0"
mount -a
# Ensure certain mounts are shared to run containers later, eg. cilium, falco
mount --make-shared /sys/fs/cgroup
mount --make-shared /sys/fs/bpf
mount --make-shared /sys
add_once /etc/hosts "${IP_ADDRESS} ${_META_HOSTNAME} ${HOSTNAME}"
# Set system wide default region for boto3
echo "export AWS_DEFAULT_REGION=$REGION" > /etc/profile.d/aws.sh
case "$CLOUD" in
aws)
setup_sns_alarms
;;
*)
ewarn "Unsupported Cloud: $CLOUD"
# return 1
;;
esac
}
################
# IAM SSH KEYS #
################
configure_sshd() {
# Change Listen port
local port=${SSHPORT:-"22"}
[ -w /etc/ssh/sshd_config ] && sed -i -e 's/^[\s#]*Port\s.*/Port '$port'/' /etc/ssh/sshd_config
case "$CLOUD" in
aws)
# on AWS call IAM for allowed groups and actual keys
local group=${SSHKEYIAMGROUP:-""}
local role=${SSHKEYIAMROLE:-"arn:aws:iam::000000000000:role/Undefined"}
[ $role == "arn:aws:iam::000000000000:role/Undefined" ] && role=""
if [ -n "$group" ]; then
# Configure SSHD
sed -i -e 's,^[\s#]*AuthorizedKeysCommand\s.*,AuthorizedKeysCommand /usr/sbin/get_iam_sshkeys.py --user %u --group '$group' --iamRole "'$role'",' /etc/ssh/sshd_config
sed -i -e 's,^[\s#]*AuthorizedKeysCommandUser\s.*,AuthorizedKeysCommandUser nobody,' /etc/ssh/sshd_config
ebegin "added $group to SSH admin keys"
fi
;;
*)
ewarn "Unsupported Cloud: $CLOUD"
# return 1
;;
esac
}
# Persist host keys
persistent_sshd_hostkeys() {
# Top level is artifact to be able to limit the SSM IAM permissions
local ssm_path=$1
local key_types="dsa ecdsa ed25519 rsa"
# If host keys exist on SSM try to download
RET=0
for key in $key_types; do
(aws ssm get-parameters --names "${ssm_path}/host_${key}.tgz" --with-decryption --query 'Parameters[0].Value' | base64 -d | tar xzf - --directory=/ 1>/dev/null 2>&1) \
&& log -t user-data info "Restored ssh_host_${key}_key from SSM" || RET=1
done
# Update keys if any key couldn't be restored from SSM
if [ $RET -eq 1 ]; then
for key in $key_types; do
if [ -r /etc/ssh/ssh_host_${key}_key -a -r /etc/ssh/ssh_host_${key}_key.pub ]; then
(aws ssm put-parameter --name "${ssm_path}/host_${key}.tgz" --type SecureString --value \
"$(tar czf - /etc/ssh/ssh_host_${key}_key /etc/ssh/ssh_host_${key}_key.pub | base64)" --overwrite) \
&& log -t user-data info "Uploaded ssh_host_${key}_key to SSM"
fi
done
fi
}
# either plain custom hostname or
# - `unique:<format_string>` eg. `uniq:kube-worker-{:02}` -> kube-worker-01
# - `myownip: <prefix>` eg. `myip: nodegroup-` -> nodegroup-1.2.3.4
set_hostname() {
local custom_hostname=$(echo $1 | awk -F. '{ print $1 }')
if [ -n "$custom_hostname" ]; then
if [[ "$custom_hostname" == unique:* ]]; then
new_hostname=$(uniq_hostname.py $AWS_EC2LAUNCHTEMPLATE_ID $INSTANCE_ID ${custom_hostname##unique:})
elif [[ "$custom_hostname" == myownip:* ]]; then
local _ip=$(echo $IP_ADDRESS | sed -e 's/\./-/g')
new_hostname=$(echo "${custom_hostname##myownip:}$_ip")
else
new_hostname=$custom_hostname
fi
FQDN="${new_hostname}.${DOMAIN_NAME}"
echo ${new_hostname} > /etc/hostname
hostname $new_hostname
export HOSTNAME=$new_hostname
# add new hostname to hosts
add_once /etc/hosts "${IP_ADDRESS} ${FQDN} ${new_hostname}"
log -t user-data info "Hostname updated to ${new_hostname}."
# hup syslog to update loghost macro
/etc/init.d/syslog-ng reload
# update Route53 entry for VPC internal FQDN
route53.py --fqdn $FQDN --record $IP_ADDRESS
# update our Name Tag to FQDN or PrivateDNSName to allow easy indentification in the AWS UI
aws ec2 create-tags --resources $INSTANCE_ID --tags Key=Name,Value=$FQDN
else
aws ec2 create-tags --resources $INSTANCE_ID --tags Key=Name,Value=${HOSTNAME}.${REGION}.compute.internal
fi
}
# various early volume functions
attach_ebs() {
local volId="$1"
local device="$2"
local tries=30
while true; do
_json="$(aws ec2 describe-volumes --volume-ids $volId --region $REGION --output json)"
rc=$?; [ $rc -ne 0 ] && return $rc
vol_status=$(echo "$_json" | jq -r .Volumes[].State)
attachedId=$(echo "$_json" | jq -r .Volumes[].Attachments[].InstanceId)
[ "$attachedId" = "$INSTANCE_ID" ] && break
if [ "$vol_status" = "available" ]; then
aws ec2 attach-volume --volume-id "$volId" --instance-id "$INSTANCE_ID" --region "$REGION" --device "$device" > /dev/null
rc=$?; [ $rc -ne 0 ] && return $rc
break
fi
# if attached but not to us -> detach
if [ "$vol_status" = "in-use" ]; then
aws ec2 detach-volume --volume-id "$volId" --region "$REGION" --force
rc=$?; [ $rc -ne 0 ] && return $rc
fi
((tries=tries-1))
[ $tries -eq 0 ] && return 1
sleep 5
done
}
_parse_volume() {
# Todo: proper checks once all is yaml
# For now just replace ':'
echo $1 | sed -e 's/:/ /g'
}
# mount optional remote volumes
mount_volumes() {
local volumes="$1"
for vol in $volumes; do
# Todo: check volume type and call matching func
read volType volId volDevice volPath < <(_parse_volume $vol)
[ "$volType" != "ebs" ] && { echo "Unknown volume type $volType"; break; }
attach_ebs $volId $volDevice
rc=$?
[ $rc -ne 0 ] && { ewarn "error trying to attach $volId"; break; }
# wait for the block device to become available
while true; do
mdev -s
test -b $volDevice && break
sleep 1
done
# check volume for existing filesystem
type=$(file -Lbs $volDevice)
if [[ "$type" =~ "XFS filesystem" ]]; then
xfs_repair $volDevice >/dev/null 2>&1
else
mkfs.xfs -qf $volDevice >/dev/null
fi
# mount
mkdir -p $volPath
mount -t xfs -o noatime $volDevice $volPath
ebegin "mounting $volDevice at $volPath"
done
}
unmount_volumes() {
local volumes="$1"
for vol in $volumes; do
read volType volId volDevice volPath < <(_parse_volume $vol)
umount $volPath && aws ec2 detach-volume --volume-id "$volId" --instance-id $INSTANCE_ID --region $REGION > /dev/null
done
}
# msg used for sns event, last one wins
msg() { MSG="$@"; log -t user-data info "$@"; }
# Generic retry command wrapper, incl. timeout of 30s
# $1 = number of tries; 0 = forever
# $2 = number of seconds to sleep between tries
# $@ actual command
retry() {
local tries=$1
local waitfor=$2
shift 2
while true; do
# Only use timeout of $1 is an executable, call directly if function
type -tf $1 >/dev/null && { timeout 30 $@ && return; } || { $@ && return; }
((tries=tries-1))
[ $tries -eq 0 ] && return 1
sleep $waitfor
done
}
add_swap() {
[ -f /.swapfile ] || { dd if=/dev/zero of=/.swapfile bs=1M count=$1 && chmod 600 /.swapfile && mkswap /.swapfile && swapon /.swapfile; }
grep -q "/.swapfile" /etc/fstab || echo "/.swapfile none swap sw 0 0" >> /etc/fstab
sysctl -w vm.swappiness=10
}
# Get SSM secure string base64 decoded
# $0 SSM_PATH, value to stdout
get_secret() {
aws ssm get-parameter --name ${1,,} --with-decryption --query 'Parameter.Value' | base64 -d
}
# Store values as base64 on SSM
# $0 SSM_PATH VALUE
put_secret() {
aws ssm put-parameter --name ${1,,} --type SecureString --value "$(echo "$2" | base64 -w0)" --overwrite
}
# Gets existing passphrase or creates new passphrase and stores it
init_passphrase() {
local _URL=$1
local _PPFILE=$2
# If secret already exists noop
[ -f $_PPFILE ] && return 0
get_secret $_URL > $_PPFILE && chmod 600 $_PPFILE || \
{ xxd -l16 -p /dev/random > $_PPFILE; chmod 600 $_PPFILE; put_secret $_URL "$(cat $_PPFILE)"; }
}
asg_heartbeat() {
[ -n "$LAUNCH_HOOK" ] && aws autoscaling record-lifecycle-action-heartbeat --instance-id $INSTANCE_ID --lifecycle-hook-name $LAUNCH_HOOK --auto-scaling-group-name $AWS_AUTOSCALING_GROUPNAME || true
}
setup_sns_alarms() {
# store SNS message json template
cat <<EOF > /var/lib/cloud/sns_alarm.json
{
"Source": "CloudBender",
"AWSAccountId": "$AWS_ACCOUNT_ID",
"Region": "$REGION",
"Artifact": "$ARTIFACT",
"Asg": "$AWS_AUTOSCALING_GROUPNAME",
"Instance": "$INSTANCE_ID",
"ip": "$IP_ADDRESS"
}
EOF
cat <<'EOF' > /var/lib/cloud/sns_alarm.sh
#!/bin/bash
SUBJECT=$1
MSG=$2
LEVEL=${3:-Info}
ATTACHMENT=${4:-""}
EMOJI=${5:-""}
EOF
if [ -n "$ALARMSNSARN" ]; then
cat <<EOF >> /var/lib/cloud/sns_alarm.sh
jq -M --arg subject "\$SUBJECT" --arg level "\$LEVEL" --arg msg "\$MSG" --arg attachment "\$ATTACHMENT" --arg emoji "\$EMOJI" --arg hostname "\$HOSTNAME" '.Subject = \$subject | .Level = \$level | .Message = \$msg | .Attachment = \$attachment | .Emoji = \$emoji | .Hostname = \$hostname' < /var/lib/cloud/sns_alarm.json | sed -e 's/\\\\\\\\/\\\\/g' > /tmp/sns.json
aws sns publish --region ${REGION} --target-arn $ALARMSNSARN --message file:///tmp/sns.json
EOF
fi
chmod +x /var/lib/cloud/sns_alarm.sh
}
exit_trap() {
set +e
trap - ERR EXIT
local ERR_CODE=$1
local ERR_LINE="$2"
local ERR_FUNC="$3"
local ERR_CMD="$4"
if [ $ERR_CODE -ne 0 ]; then
CFN_STATUS="FAILURE"
RESULT="ABANDON"
else
CFN_STATUS="SUCCESS"
RESULT="CONTINUE"
fi
# Add SNS events on demand
if [ -n "$ALARMSNSARN" ]; then
if [ $ERR_CODE -ne 0 ]; then
LEVEL="Error"
SUBJECT="Error during cloud-init."
if [ $ERR_LINE -ne 1 ]; then
MSG="$ERR_CMD failed in $ERR_FUNC at $ERR_LINE. Return: $ERR_CODE"
ATTACHMENT="$(pr -tn $0 | tail -n+$((ERR_LINE - 3)) | head -n7)"
else
MSG="$ERR_CMD"
fi
if [ -n "$DEBUG" ]; then
SUBJECT="$SUBJECT Instance kept running for debug."
else
SUBJECT="$SUBJECT Instance terminated by ASG lifecycle hook."
fi
else
LEVEL="Info"
SUBJECT="ZDT Alpine Instance launched."
fi
if [ -z "${DISABLECLOUDBENDERSNSSCALINGEVENTS}" ] || [ "$LEVEL" != "Info" ]; then
/var/lib/cloud/sns_alarm.sh "$SUBJECT" "$MSG" "$LEVEL" "$ATTACHMENT"
fi
# Disable scaling events during shutdown
[ -n "${DISABLECLOUDBENDERSNSSCALINGEVENTS}" ] && echo "DISABLE_SCALING_EVENTS=1" >> /etc/cloudbender.conf
fi
[ -n "$LAUNCH_HOOK" ] && aws autoscaling complete-lifecycle-action --lifecycle-action-result $RESULT --instance-id $INSTANCE_ID --lifecycle-hook-name $LAUNCH_HOOK --auto-scaling-group-name ${AWS_AUTOSCALING_GROUPNAME} || true
if [ -n "${AWS_CLOUDFORMATION_LOGICAL_ID}" ]; then
aws cloudformation signal-resource --stack-name ${AWS_CLOUDFORMATION_STACK_NAME} --logical-resource-id ${AWS_CLOUDFORMATION_LOGICAL_ID} --unique-id ${INSTANCE_ID} --status ${CFN_STATUS}
fi
# timestamp being done
end_uptime=$(awk '{print $1}' < /proc/uptime)
log -t user-data info "Exiting user-data. Duration: $(echo "$end_uptime-$start_uptime" | bc) seconds"
# Shutdown / poweroff if we ran into error and not DEBUG
[ $ERR_CODE -ne 0 -a -z "$DEBUG" ] && poweroff
exit 0
}
### S3 based locking
MUTEX=mutex
MUTEX_OWNER=$HOSTNAME
MUTEX_TIMEOUT=600
release_lock() {
local S3LOCK=$1
rm -f $MUTEX
aws s3 rm $S3LOCK
}
# Lock not timed out and we own it: 0
# Lock not timed out and someone else owns it: 1
# Lock timed out: 2
verify_lock() {
local S3LOCK=$1
aws s3 cp $S3LOCK $MUTEX
_host=$(grep "MUTEX_OWNER=" $MUTEX | sed -e 's/MUTEX_OWNER=//')
_time=$(grep "MUTEX_TIME=" $MUTEX | sed -e 's/MUTEX_TIME=//')
# Check for timestamp and timeout
let timepassed=$(date -u +%s)-$_time
[ $timepassed -gt $MUTEX_TIMEOUT ] && return 2
[ "$_host" == "$MUTEX_OWNER" ] && return 0
return 1
}
aquire_lock() {
local S3LOCK=$1
echo "MUTEX_OWNER=${MUTEX_OWNER}" > $MUTEX
echo "MUTEX_TIME=$(date -u +%s)" >> $MUTEX
aws s3 cp $MUTEX $S3LOCK
# verify we actually got the lock
sleep 2
verify_lock $S3LOCK
}
check_lock() {
local S3LOCK=$1
aws s3 ls $S3LOCK && rc=$? || rc=$?
# No LOCK ? -> get it !
if [ $rc -ne 0 ]; then
aquire_lock $S3LOCK
else
verify_lock $S3LOCK && rc=$? || rc=$?
# Lock timeout -> we get it
if [ $rc -eq 2 ]; then
aquire_lock $S3LOCK
# we already own it
elif [ $rc -eq 0 ]; then
return 0
# someone else has a valid lock
else
return 1
fi
fi
}
# All things networking
enable_ip_forwarding() {
local interface=$1
modprobe nf_conntrack
cat <<EOF > /etc/sysctl.d/40-ip-forward.conf
net.ipv4.ip_forward=1
net.ipv4.ip_local_port_range = 1024 65535
net.ipv4.conf.$interface.send_redirects=0
net.ipv4.conf.all.rp_filter = 1
net.ipv4.conf.all.accept_redirects = 0
net.ipv6.conf.default.forwarding = 1
net.ipv6.conf.all.forwarding = 1
net.netfilter.nf_conntrack_max = 524288
EOF
sysctl -p /etc/sysctl.d/40-ip-forward.conf
}
enable_iptables() {
rc-update add iptables
/etc/init.d/iptables save
}
register_service_dns() {
if [ -n "$DNSZONE" -a -n "$SERVICENAME" ]; then
_IP=${PUBLIC_IP_ADDRESS:-$IP_ADDRESS}
[ -n "SERVICEPRIVATE" ] && _IP=$IP_ADDRESS
route53.py --fqdn "${SERVICENAME}.${DNSZONE}" --record $_IP
cat <<EOF >> /etc/local.d/route53.stop
echo "Deleting Route53 record for ${SERVICENAME}.${DNSZONE}" >> /tmp/shutdown.log
route53.py --delete --fqdn "${SERVICENAME}.${DNSZONE}" --record ${PUBLIC_IP_ADDRESS:-$IP_ADDRESS}
EOF
chmod +x /etc/local.d/route53.stop
# Short cut our public IP to private one to allow talking to our own service name
add_once /etc/hosts "${IP_ADDRESS} ${SERVICENAME}.${DNSZONE}"
fi
}
# associate EIP
# return 0 if we attache EIP
# return 1 if we public did NOT change
associate_eip() {
local instance_id=$1
local eip=$(echo $2 | sed -e 's/\/32//' | grep -E -o "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)") || true
local current_instance
if [ -n "$eip" ]; then
if [ "$eip" != "0.0.0.0" ]; then
read eip_alloc_id eip_assoc_id current_instance < <(aws ec2 describe-addresses --public-ips $eip --query 'Addresses[*].[AllocationId,AssociationId,InstanceId]' || true)
# If we already own and have the EIP attached -> done
[ "$instance_id" == "$current_instance" ] && return
if [ ! -z "$eip_alloc_id" ]; then
if [[ "$eip_assoc_id" =~ ^eipassoc- ]]; then
log -t user-data info "EIP $eip already associated via Association ID ${eip_assoc_id}. Disassociating."
retry 3 10 aws ec2 disassociate-address --association-id $eip_assoc_id
fi
log -t user-data info "Associating Elastic IP $eip via Allocation ID $eip_alloc_id with Instance $instance_id"
aws ec2 associate-address --no-allow-reassociation --instance-id $instance_id --allocation-id $eip_alloc_id
return
else
log -t user-data warn "Elastic IP $eip address not found."
fi
else
log -t user-data info "0.0.0.0 requested, keeping AWS assigned IP."
fi
else
log -t user-data debug "Invalid or no ElasticIP defined. Skip"
fi
return 1
}
# Accept incoming traffic for everything
disable_source_dest_check() {
aws ec2 modify-instance-attribute --instance-id ${INSTANCE_ID} --source-dest-check "{\"Value\": false}"
}
# Register ourself at route tables
register_routes() {
local rtb_id_list=$1
local route_cidr=${2:-"0.0.0.0/0"}
for cidr in ${route_cidr//,/ }; do
for rt in ${rtb_id_list//,/ }; do
[[ "$rt" =~ ^rtb-[a-f0-9]*$ ]] || { log -t user-data warn "Invalid Route Table ID: $rt"; return 1; }
aws ec2 create-route --route-table-id $rt --destination-cidr-block "${cidr}" --instance-id ${INSTANCE_ID} || \
aws ec2 replace-route --route-table-id $rt --destination-cidr-block "${cidr}" --instance-id ${INSTANCE_ID}
done
done
}
setup_nat() {
local mode=$1
# Masquerade all outgoing traffic
iptables -t nat -A POSTROUTING -o $DEFAULT_GW_INTERFACE -s ${VPC_CIDR_RANGE} -j MASQUERADE
}
setup_fluentbit() {
local token="cloudbender"
if [[ $FLUENTDURL == *@* ]]; then
token=${FLUENTDURL%%@*}
FLUENTD_URL=${FLUENTDURL##*@}
fi
LOG_FILES=$FLUENTDLOGFILES
# Add a local file based syslog parser which does not require Priority
cat <<EOF > /etc/fluent-bit/metadata.conf
# add some AWS metadata
[FILTER]
Name record_modifier
Match *
Record source.ip $IP_ADDRESS
Record source.instance_id $INSTANCE_ID
Record source.region $REGION
Record source.account $AWS_ACCOUNT_ID
Record source.conglomerate $CONGLOMERATE
Record source.artifact $ARTIFACT
EOF
# install logrotate fragment
cat <<EOF > /etc/logrotate.d/fluentbit
/var/log/fluentbit.log
{
rotate 3
missingok
notifempty
compress
maxsize 10M
daily
postrotate
rc-service fluent-bit restart
endscript
}
EOF
rc-update add fluent-bit default
rc-service fluent-bit start
}

View File

@ -6,6 +6,6 @@ shift
ATTACHMENT="$@"
if [ -n "${MONIT_SERVICE}${MONIT_EVENT}" -a -n "$MONIT_DESCRIPTION" ]; then
[ -x /var/lib/cloudbender/sns_alarm.sh ] && \
/var/lib/cloudbender/sns_alarm.sh "$MONIT_SERVICE - $MONIT_EVENT" "$MONIT_DESCRIPTION" "$LEVEL" "$ATTACHMENT"
[ -x /var/lib/cloud/sns_alarm.sh ] && \
/var/lib/cloud/sns_alarm.sh "$MONIT_SERVICE - $MONIT_EVENT" "$MONIT_DESCRIPTION" "$LEVEL" "$ATTACHMENT"
fi

View File

@ -1,6 +1,6 @@
# Give instance 3 min to settle after boot
set daemon 30
with start delay 180
# add `for 2 cycles` might be better than this intial block
# with start delay 120
set log syslog

View File

@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
# use pyminify: pyminifier --obfuscate-variables $0 > minified_$0

View File

@ -13,12 +13,15 @@ echo 'enabled cgroupv2, openRC logging'
#sed -i -e 's/^[\s#]*rc_parallel=.*/rc_parallel="YES"/' /etc/rc.conf
#echo 'enable parallel openRC'
# load falco kernel module at boot
grep -q falco /etc/modules || echo falco >> /etc/modules
# Setup syslog-ng json logging and apparmor tweaks
cp /lib/zdt/syslog-ng.conf /etc/syslog-ng/syslog-ng.conf
cp /lib/zdt/syslog-ng.logrotate.conf /etc/logrotate.d/syslog-ng
cp /lib/zdt/syslog-ng.apparmor /etc/apparmor.d/local/sbin.syslog-ng
mv /etc/periodic/daily/logrotate /etc/periodic/hourly/
[ -f /etc/periodic/daily/logrotate ] && mv /etc/periodic/daily/logrotate /etc/periodic/hourly/
echo 'syslog-ng: all to /var/log/messages as json, rotate hourly'
# use init to spawn monit
@ -26,5 +29,5 @@ echo ":2345:respawn:/usr/bin/monit -Ic /etc/monitrc.zdt" >> /etc/inittab
echo 'Enable monit via inittab'
# QoL
mv /etc/profile.d/color_prompt.sh.disabled /etc/profile.d/color_prompt.sh || true
[ -f /etc/profile.d/color_prompt.sh.disabled ] && mv /etc/profile.d/color_prompt.sh.disabled /etc/profile.d/color_prompt.sh || true
echo 'alias rs="doas bash"' > /etc/profile.d/alias.sh

10
scripts/rebuild_new_kernel.sh Executable file
View File

@ -0,0 +1,10 @@
#!/bin/bash
PACKETS="falco-kernel nvidia-open-gpu aws-neuron-driver"
for p in $PACKETS; do
rm -f packages/kubezero/*/$p*.apk
make apk PKG=kubezero/$p/APKBUILD
done
make upload