diff --git a/Makefile b/Makefile index b3b93c9..1d9fd8a 100644 --- a/Makefile +++ b/Makefile @@ -47,13 +47,13 @@ apk: packages distfiles download: aws s3 sync s3://zero-downtime-web-cdn/alpine/$(RELEASE)/kubezero/x86_64/ packages/kubezero/x86_64/ --exclude APKINDEX.tar.gz - aws s3 sync s3://zero-downtime-web-cdn/alpine/$(RELEASE)/kubezero/aarch64/ packages/kubezero/aarch64/ --exclude APKINDEX.tar.gz + #aws s3 sync s3://zero-downtime-web-cdn/alpine/$(RELEASE)/kubezero/aarch64/ packages/kubezero/aarch64/ --exclude APKINDEX.tar.gz invalidate_cdn: aws cloudfront create-invalidation --distribution $(CF_DIST) --paths "/alpine/*" upload: invalidate_cdn aws s3 sync --delete packages/kubezero/x86_64/ s3://zero-downtime-web-cdn/alpine/$(RELEASE)/kubezero/x86_64/ --exclude APKINDEX.tar.gz - aws s3 sync --delete packages/kubezero/aarch64/ s3://zero-downtime-web-cdn/alpine/$(RELEASE)/kubezero/aarch64/ --exclude APKINDEX.tar.gz + #aws s3 sync --delete packages/kubezero/aarch64/ s3://zero-downtime-web-cdn/alpine/$(RELEASE)/kubezero/aarch64/ --exclude APKINDEX.tar.gz aws s3 cp packages/kubezero/x86_64/APKINDEX.tar.gz s3://zero-downtime-web-cdn/alpine/$(RELEASE)/kubezero/x86_64/ --cache-control max-age=1 - aws s3 cp packages/kubezero/aarch64/APKINDEX.tar.gz s3://zero-downtime-web-cdn/alpine/$(RELEASE)/kubezero/aarch64/ --cache-control max-age=1 + #aws s3 cp packages/kubezero/aarch64/APKINDEX.tar.gz s3://zero-downtime-web-cdn/alpine/$(RELEASE)/kubezero/aarch64/ --cache-control max-age=1 diff --git a/kubezero/aws-iam-authenticator/APKBUILD b/kubezero/aws-iam-authenticator/APKBUILD index 86bc2ca..458149e 100644 --- a/kubezero/aws-iam-authenticator/APKBUILD +++ b/kubezero/aws-iam-authenticator/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Stefan Reimer # Maintainer: Stefan Reimer pkgname=aws-iam-authenticator -pkgver=0.6.2 +pkgver=0.6.10 pkgrel=0 pkgdesc="AWS aws-iam-authenticator" url="https://github.com/kubernetes-sigs/aws-iam-authenticator" @@ -20,5 +20,5 @@ package() { } sha512sums=" -4789fe7c11d4d1b94da5f35844a0da8e62da743bef3fc13f668c542f3dbc83584ef29abbcebc6f4651aad8ecbd9195d6bfc13476c7dd4a1d34ed11822652fc5e aws-iam-authenticator-0.6.2.tar.gz +2b5da6dfbec1f5483ead8da280de8dd719b71157a9bfa4751c015dbc77a4f4c01a59486015cd2231ffb4232a0bf4a35ef843007605dd0b9fffd51ca0208f8fda aws-iam-authenticator-0.6.10.tar.gz " diff --git a/kubezero/cri-o/APKBUILD b/kubezero/cri-o/APKBUILD index daf1408..56222b1 100644 --- a/kubezero/cri-o/APKBUILD +++ b/kubezero/cri-o/APKBUILD @@ -3,7 +3,7 @@ # Contributor: TBK # Maintainer: ungleich pkgname=cri-o -pkgver=1.26.3 +pkgver=1.26.4 pkgrel=0 pkgdesc="OCI-based implementation of Kubernetes Container Runtime Interface" url="https://github.com/cri-o/cri-o/" @@ -52,6 +52,7 @@ source=" makefile-fix-install.patch fix-test.patch remove-systemd-files.patch + crictl.yaml " # secfixes: @@ -89,15 +90,18 @@ package() { install -Dm755 "$srcdir"/crio.initd "$pkgdir"/etc/init.d/crio install -Dm644 "$srcdir"/crio.conf "$pkgdir"/etc/crio/crio.conf install -Dm644 "$srcdir"/crio.logrotated "$pkgdir"/etc/logrotate.d/crio + + install -Dm644 "$srcdir"/crictl.yaml "$pkgdir"/etc/crictl.yaml } sha512sums=" -58718db358d35b61e5edb8a16185bc534337a1ebfaf0d40ab17efb73c266fb2c337fad3cf92a7d8bcc7a02c4e2180b2b79a5896eb635b43334bcc1366b12baf8 cri-o-1.26.3.tar.gz +99bf6b438da236491fcc33ddaa28aeb381fc40c04138918be98fca1117132c5616598e4d758a6852071a67e4884895494b091c9206490a964a559723f77b84e7 cri-o-1.26.4.tar.gz 1f60719677295c9c5c615eb25d9159bde0af68a132eee67747f57fe76642d457c98c896c6189f85637d7b4ac24ba55fd9eaeb1699f43c3c5077b645f72a479fb crio.conf -26048a219bc426ef4a4f50e96d6e0ded1c55dc8415df9a2773764d2ebcb3d9e91077b2916da1ff32674ca4a53062e41e185503d671dacc3167a018b0066347e1 crio.initd +cfc4c144931400023e6642fa0b9880f0e3c09c187542905ca56044cedafb5e1f1d49708e4352233abee4e02181155c02fc9688bf93202fc8d80dfc1ffc90699b crio.initd 1115228546a696eeebeb6d4b3e5c3152af0c99a2559097fc5829d8b416d979c457b4b1789e0120054babf57f585d3f63cbe49949d40417ae7aab613184bf4516 crio.logrotated 0a567dfa431ab1e53f2a351689be8d588a60cc5fcdbda403ec4f8b6ab9b1c18ad425f6c47f9a5ab1491e3a61a269dc4efa6a59e91e7521fa2b6bb165074aa8e0 cni-plugins-path.patch f9577aa7b1c90c6809010e9e406e65092251b6e82f6a0adbc3633290aa35f2a21895e1a8b6ba4b6375dcad3e02629b49a34ab16387e1c36eeb32c8f4dac74706 makefile-fix-install.patch 1c1bfa5feeb0c5ddc92271a5ef80edc38d56afa1574ffc124605d5bb227a407b55dd5268df6cebc6720768ac31245e08b7950e5ab2b7f14ba934c94f1e325f86 fix-test.patch 78c150f87027de489289596371dce0465159ced0758776b445deb58990e099de9c654406183c9da3cc909878b24d28db62121b7056cd180a6f2820e79e165cc6 remove-systemd-files.patch +79e1a7c6183ba56f55d923e9d738be945564494042bc011d31e9195f66c268d702ee5c86711d4b46618285fc1b10b59ea55c321390feca770cfc7de334e103bd crictl.yaml " diff --git a/kubezero/cri-o/crictl.yaml b/kubezero/cri-o/crictl.yaml new file mode 100644 index 0000000..30f6de3 --- /dev/null +++ b/kubezero/cri-o/crictl.yaml @@ -0,0 +1,5 @@ +# runtime entpoint, see https://github.com/kubernetes/enhancements/issues/2371 might be fixed with 1.29 ? +runtime-endpoint: unix:///run/crio/crio.sock +image-endpoint: unix:///var/run/crio/crio.sock +timeout: 60 +debug: false diff --git a/kubezero/cri-o/crio.initd b/kubezero/cri-o/crio.initd index c5eec65..3a1ac9a 100755 --- a/kubezero/cri-o/crio.initd +++ b/kubezero/cri-o/crio.initd @@ -8,7 +8,9 @@ extra_started_commands="reload" command="/usr/bin/${RC_SVCNAME}" command_args="${command_args}" command_background="true" -start_stop_daemon_args=" \ + +# We run all containers with nice level 1 +start_stop_daemon_args="-N 1 \ --stdout /var/log/${RC_SVCNAME}/${RC_SVCNAME}.log \ --stderr /var/log/${RC_SVCNAME}/${RC_SVCNAME}.log" diff --git a/kubezero/kubernetes/APKBUILD b/kubezero/kubernetes/APKBUILD index c6976be..ff4b6f9 100644 --- a/kubezero/kubernetes/APKBUILD +++ b/kubezero/kubernetes/APKBUILD @@ -5,7 +5,7 @@ # Contributor: Dave # Maintainer: Stefan Reimer pkgname=kubernetes -pkgver=1.26.6 +pkgver=1.26.7 pkgrel=0 pkgdesc="Container Cluster Manager" url="https://kubernetes.io/" @@ -208,7 +208,7 @@ _do_zshcomp() { } sha512sums=" -02a6ce3c0ec44dce5b03b97d4a544ac96d43a2342f172c176cbe83b269ffec4c3704f01f620eb11cfdc61728f8431ab6ffdbecc21e0cb29cb388b62c1a930cdf kubernetes-1.26.6.tar.gz +9069e653e87883e54df8e01edf2cce9d847a83d593f13e8281654653924586e73841d1ee302de4de93dadf2a2474e875cf350f03c2aec512c100cb3d4fb7d9c5 kubernetes-1.26.7.tar.gz 5427c2e653504cfd5b0bcaf195d4734ee40947ddfebc9f155cd96dddccfc27692c29d94af4ac99f1018925b52995c593b584c5d7a82df2f185ebce1a9e463c40 make-e2e_node-run-over-distro-bins.patch 94d07edfe7ca52b12e85dd9e29f4c9edcd144abc8d120fb71e2a0507f064afd4bac5dde30da7673a35bdd842b79a4770a03a1f3946bfae361c01dd4dc4903c64 make-test-cmd-run-over-hyperkube-based-kubectl.patch e690daff2adb1013c92124f32e71f8ed9a18c611ae6ae5fcb5ce9674768dbf9d911a05d7e4028488cda886e63b82e8ac0606d14389a05844c1b5538a33dd09d1 kube-apiserver.initd @@ -223,7 +223,7 @@ d7e022ee22da191bda7382f87cb293d9c9d115a3df0c2054bf918279eb866f99c6d5c21e4c98eae8 561bef5633ba4b9021720624443d9c279a561e5fabea76e5d0fbee2e7ad8999029a2511a45895fbec8448026212a3c5b4c197b248a6afa7f8bd945f705524ea7 kube-scheduler.initd af88b382ab75657d0ff13c3f8f6d924cef9f2df7807a9a27daa63495981801bc4b607998f65c0758c11a7e070e43c24f7184ba7720711109c74b1c4d57919e34 kube-scheduler.confd 3692da349dd6ed0f5acc09d7b95ac562ffecb103e2270bebdfe4a7808d48dada9d2debff262d85b11c47f9ca3f0c20000712d03629ed813ff08a3e02d69267e6 kube-scheduler.logrotated -1b0e0cc45666b18ecb333bf3835b282e9f72e40bf29b7a9d7b9e5e2bbbd009297be8c31d284e865af45d66434a27dee14e617e61fac0dda0242d7d0f3fc89ce8 kubelet.initd -f79ea6dec632ca052f8cd061bf0c5f7f821c7013c048b4737752e1a41c677c020c1f9454ddabe7f9ba5d55c2a8a4718170e30049b7212a4a2dc91148a3ac7ebc kubelet.confd +73fdb0303e72c006f4570af28312ecee224beb1d6cc1e19003593af377436b4082f6d49bd25cd9cae258ffa01bc9f2f0624d11ef0ecc64c658761888923be812 kubelet.initd +887ee5b4c67198727407e74c92639b23674515d5f049938f8ce5f3ba2eabcf7f321c00c914b254a7b2baa5c2f45a9ae4a945c9c90f1968f1012dbd60245d1b81 kubelet.confd 941f4a7579dcf78da2d323ac69195e95eba6600e6fcefe9231447f11c9867a7aa57b4189ee1fefb10eab19c89665ea2e7696b539c92e99fbcde905d2ff85be58 kubelet.logrotated " diff --git a/kubezero/kubernetes/kubelet.confd b/kubezero/kubernetes/kubelet.confd index 3d40778..b924610 100644 --- a/kubezero/kubernetes/kubelet.confd +++ b/kubezero/kubernetes/kubelet.confd @@ -1 +1,2 @@ -command_args="--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf --cgroup-driver=cgroupfs --config=/var/lib/kubelet/config.yaml" +command_args="--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf --config=/var/lib/kubelet/config.yaml --image-credential-provider-bin-dir=/usr/libexec/kubernetes/kubelet-plugins --image-credential-provider-config=/etc/kubernetes/credential-provider.yaml" +rc_after="cloudbender" diff --git a/kubezero/kubernetes/kubelet.initd b/kubezero/kubernetes/kubelet.initd index d1bb2bb..7cc9880 100755 --- a/kubezero/kubernetes/kubelet.initd +++ b/kubezero/kubernetes/kubelet.initd @@ -5,6 +5,10 @@ supervisor=supervise-daemon description="Kubelet, a Kubernetes node agent" +# do not start without kubezero node config in place +required_files="/var/lib/kubelet/kubeadm-flags.env /var/lib/kubelet/config.yaml" +respawn_max=0 + if [ -e /var/lib/kubelet/kubeadm-flags.env ]; then . /var/lib/kubelet/kubeadm-flags.env; fi diff --git a/kubezero/kubezero/APKBUILD b/kubezero/kubezero/APKBUILD index 4191dcf..e02d579 100644 --- a/kubezero/kubezero/APKBUILD +++ b/kubezero/kubezero/APKBUILD @@ -15,7 +15,7 @@ depends=" kubelet~$pkgver kubectl~$pkgver ecr-credential-provider~$pkgver - aws-iam-authenticator~0.6.2 + aws-iam-authenticator~0.6.10 " options="!check" #install="$pkgname.post-install" @@ -27,10 +27,13 @@ subpackages=" source=" shared-sys-fs.start evictLocalNode.sh + credential-provider.yaml + kubelet.monit + crio.conf " IMAGES=" - quay.io/cilium/cilium:v1.13.4 + quay.io/cilium/cilium:v1.13.5 ghcr.io/k8snetworkplumbingwg/multus-cni:v3.9.3 " @@ -43,11 +46,16 @@ build() { } package() { - # make /sys shared - install -Dm755 "$srcdir"/shared-sys-fs.start "$pkgdir/etc/local.d/shared-sys-fs.start" - # drain local node install -Dm755 "$srcdir"/evictLocalNode.sh "$pkgdir/usr/bin/evictLocalNode.sh" + + mkdir -p $pkgdir/etc/kubernetes/manifests + install -Dm644 "$srcdir"/credential-provider.yaml "$pkgdir/etc/kubernetes/credential-provider.yaml" + + install -Dm644 "$srcdir"/kubelet.monit "$pkgdir/etc/monit.d/kubelet.conf" + + # crio settings + install -Dm644 "$srcdir"/crio.conf "$pkgdir/etc/crio.conf.d/01-kubezero.conf" } # Preload container images all nodes need to speed up boot time and reduce data transfer @@ -63,4 +71,7 @@ imagecache() { sha512sums=" ecb33fc3a0ffc378723624858002f9f5e180e851b55b98ab6611ecc6a73d4719bc7de240f87683fc58de8bf577059e6f19b417655b5301ef8c32deff67a29dff shared-sys-fs.start fce1013f7b1bfa8ee526de62e642a37fda3168889723e873d3fb69e257f4caa1423b5a14b9343b12a87f3b6f93c7d3861b854efda67ef2d6a42a5ca8cf3d1593 evictLocalNode.sh +716ec3404d7016bce57d663f750a18db3ede07c1ba7a2908f9f01f41c5ca8fe4e7232ded27bc2bccd705b11ae5cd26574322a8eacefcf8c102bba0f8e4995e59 credential-provider.yaml +abf062fbb2b94831f5321265a648bd17ddbb198e446e763d64d0cc8e3b7320e1545376cfa57b1491bb296ace28f1623439807a4157a2f32984082e565e2edcba kubelet.monit +064fc245b7ffd67834a2f5fd13cb0bcb5f4a5caf79b8113b3669bf1d0e1a4af2042e69f8f496991de76d621fd01bc7e67de37c59f034584d12622c6af96376ff crio.conf " diff --git a/kubezero/kubezero/credential-provider.yaml b/kubezero/kubezero/credential-provider.yaml new file mode 100644 index 0000000..7ab0dca --- /dev/null +++ b/kubezero/kubezero/credential-provider.yaml @@ -0,0 +1,12 @@ +apiVersion: kubelet.config.k8s.io/v1alpha1 +kind: CredentialProviderConfig +providers: + - name: ecr-credential-provider + matchImages: + - "*.dkr.ecr.*.amazonaws.com" + - "*.dkr.ecr.*.amazonaws.cn" + - "*.dkr.ecr-fips.*.amazonaws.com" + - "*.dkr.ecr.us-iso-east-1.c2s.ic.gov" + - "*.dkr.ecr.us-isob-east-1.sc2s.sgov.gov" + defaultCacheDuration: "12h" + apiVersion: credentialprovider.kubelet.k8s.io/v1alpha1 diff --git a/kubezero/kubezero/crio.conf b/kubezero/kubezero/crio.conf new file mode 100644 index 0000000..6c1b095 --- /dev/null +++ b/kubezero/kubezero/crio.conf @@ -0,0 +1,8 @@ +[crio.metrics] +enable_metrics = true + +[crio.runtime] +default_ulimits = [ "nofile=65535:65535", "memlock=-1:-1" ] + +[crio.network] +cni_default_network="multus-cni-network" diff --git a/kubezero/kubezero/kubelet.monit b/kubezero/kubezero/kubelet.monit new file mode 100644 index 0000000..4c09368 --- /dev/null +++ b/kubezero/kubezero/kubelet.monit @@ -0,0 +1,9 @@ +check process kubelet pidfile /run/kubelet.pid + start program = "/etc/init.d/kubelet start" + stop program = "/etc/init.d/kubelet stop" + if failed + port 10248 + protocol http + request "/healthz" + for 2 cycles + then restart diff --git a/kubezero/nvidia-open-gpu/APKBUILD b/kubezero/nvidia-open-gpu/APKBUILD index c7af6ad..7e64752 100644 --- a/kubezero/nvidia-open-gpu/APKBUILD +++ b/kubezero/nvidia-open-gpu/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Stefan Reimer # Maintainer: Stefan Reimer pkgname=nvidia-open-gpu -#pkgver=535.54.03 +#pkgver=535.86.05 pkgver=525.125.06 pkgrel=0 pkgdesc="NVIDIA Linux open GPU kernel modules" diff --git a/kubezero/zdt-base/APKBUILD b/kubezero/zdt-base/APKBUILD index be4e2f4..6f869f6 100644 --- a/kubezero/zdt-base/APKBUILD +++ b/kubezero/zdt-base/APKBUILD @@ -13,10 +13,8 @@ subpackages="$pkgname-openrc $pkgname-aws" install="$pkgname.post-install" source=" - cb_base.sh - cb_lock.sh - cb_volumes.sh - cb_init.sh + common.sh + boot.sh cloudbender-early.init cloudbender.init zdt-sysctl.conf @@ -40,13 +38,11 @@ build() { package() { # core libraries - install -Dm755 "$srcdir/cb_base.sh" "$pkgdir/usr/lib/cloudbender/base.sh" - install -Dm755 "$srcdir/cb_lock.sh" "$pkgdir/usr/lib/cloudbender/lock.sh" - install -Dm755 "$srcdir/cb_volumes.sh" "$pkgdir/usr/lib/cloudbender/volumes.sh" - install -Dm755 "$srcdir/cb_init.sh" "$pkgdir/usr/lib/cloudbender/init.sh" + install -Dm755 "$srcdir/boot.sh" "$pkgdir/usr/lib/cloudbender/boot.sh" + install -Dm755 "$srcdir/common.sh" "$pkgdir/usr/lib/cloudbender/common.sh" # convienience - mkdir -p "$pkgdir"/etc/cloudbender "$pkgdir/home/alpine" + mkdir -p "$pkgdir/home/alpine" install -Dm644 "$srcdir/profile" "$pkgdir/home/alpine/.profile" # various sysctl tunings @@ -89,23 +85,21 @@ aws() { } sha512sums=" -13944b955f3c2db54674cd84d58950fc9d8ca522d96081f05ed9bfef8289e30c3dd508796c94d9b283b9c168aadc70f9a934442ec48a5c0c9f25578e62ea0b78 cb_base.sh -3e02b858680d751b2c1fb22feeec2b767e328fdf007d9fb6687f309e62630467e982cc33d47c4417311938d35bb17dc210f9e0a40b90298dc22cf142f022c124 cb_lock.sh -d7bb357e25aa797b68185036814f1b34d4d6b098de43ef0cf3b71011ebee3d6c2e4bd8fa73a5c7dd6f37c6bbbf3c9c344461a9507346104d4fe783dd1f8b2e23 cb_volumes.sh -4ccae4451de8fa83d1a73182ad539ca218a98f68f0bbfe2fc4d99ade75e802e3baa65b4a0e00ae2a0b3c870e6467b219d1c5a22e04f3930c3efd7de6c3cf38ab cb_init.sh -9c688e08e44ae965eaa6021e005f65077690c30fe6b2de7371c57ae3a53a18f12a356587261c950b8c900f73cb35af3ba7795d33181532b485aeee3b6ca41757 cloudbender-early.init -7fea4ed0ebcf781ae197962c9f6d287c66e7380c758126a1b46c711baf3c7e93d6ccf1d536dada590ca02a7f7b271fd5ecb0dcb4854772cc8b1b70d379249f65 cloudbender.init +a99d8fada2ce90876abbd84d8f72c976d1363e0b1437952aee8b22983b7bc7492803950bcc4dfb9866fcf744b9b6056bdbd53c257780d26814f16c8b0983242f common.sh +7f6a69a77d6a4a3c34928609108b7939cd43a892d72fb14bebc1d935cd66eda3bd625d15eebb4d6026715b36b12919fcaf863ed5f65ffdc0e2de9fc1b969cb3e boot.sh +ee19dcc0b46bdff8581c2661cda69fd8a3fa2de4dd30d96a4ce438b2536043a9f0bc57a6b0d4056e2715a2663a89bc1b07ec33798d5430a2046a65069a327cda cloudbender-early.init +9ca46acc407ff6aa18beec02564c3822db215bd5dc0a94f9bd9258c9b99f85cc40f793e20618509ed7f1e8645407cffb8274d7838b46442ad44e64726e37e3a0 cloudbender.init b9479835d8667fa99f8b1b140f969f0464a9bb3c60c7d19b57e306cfe82357d453932791e446caded71fddd379161ae8328367f1ee75ae3afc1b85e12294b621 zdt-sysctl.conf 76e6a4f309f31bfa07de2d3b1faebe5670722752e18157b69d6e868cbe9e85eda393aed0728b0347a01a810eee442844c78259f86ff71e3136a013f4cbfaaea4 ps_mem.py 5376f4bf8356ce9249c45e78085073245181e8742c7b4be47c71dcd97a611ae125a7dfd3060502bdd591560af070334f89fe60dbc09c008926149c538ab0560a syslog-ng.conf 484bdcf001b71ce5feed26935db437c613c059790b99f3f5a3e788b129f3e22ba096843585309993446a88c0ab5d60fd0fa530ef3cfb6de1fd34ffc828172329 syslog-ng.logrotate.conf e86eed7dd2f4507b04050b869927b471e8de26bc7d97e7064850478323380a0580a92de302509901ea531d6e3fa79afcbf24997ef13cd0496bb3ee719ad674ee syslog-ng.apparmor -b928ba547af080a07dc9063d44cb0f258d0e88e7c5a977e8f1cf1263c23608f0a138b8ffca0cdf5818ee72fccb3ce8433f877811be3107bb9c275dfff988179c monitrc -64944727d658ff37e7ff9d22a23869e225e104d9229756ba4fef1fc481c56f782472a1c74f8127636b4a98d4545ae30e7d35822a1f0a0fa31a59ec8eaf8c0396 monit_alert.sh.aws +e00a8f296c76446fe1241bf804c0108f47a2676f377a413ee9fede0943362a6582cad30fe13edd93f3d0daab0e2d7696553fb9458dca62adc05572dce339021a monitrc +c955dabe692c0a4a2fa2b09ab9096f6b14e83064b34ae8d22697096daf6551f00b590d837787d66ea1d0030a7cc30bef583cc4c936c980465663e73aec5fa2dc monit_alert.sh.aws 346b0170ada6cc1207ffb7b8ef138a1570a63c7df4d57618aa4b6b6c0d2df2197b0f5b23578ec83c641ee5e724865ac06985222e125809c990467426a0851b72 neofetch.conf 532b8e2eb04942ab20bdc36b5dea1c60239fcbfcb85706123f3e05c18d65c938b85e9072d964ae5793177625a8db47b532db1f5bd5ed5ecbb70d5a331666ff54 zdt-ascii.txt c3e72cd92936b03f2b9eab5e97e9a12fcddcdf2c943342e42e7702e2d2407e00859c62dc9b4de3378688d2f05458aa5c104272af7ab13e53a62f1676d1a9a1b4 profile 816049360aa442f9e9aa4d6525795913cfe3dc7c6c14dc4ccad59c0880500f9d42f198edc442fe036bc84ba2690d9c5bc8ae622341d8276b3f14947db6b879b1 route53.py 7da28446762a36a6737c5b30becbce78775bd943b4d0c5ef938a50f49b4f51f66708434aa79004c19d16c56c83f54c8d6d68e1502ebc250c73f8aae12bed83c0 get_iam_sshkeys.py -8fd5dca9b9fdae61022f136215afa8adc0d199afcf26593bdd0bd1946d0f2efc5d7ed345704ef9642fbeedeeea007ed31b67fafe89289b749a560a045da45b8e uniq_hostname.py +ae1941fc45e61fa8d211f5ef7eff2dd01510a6d364c4302cab267812321a10e7434ecc8d8c9263d8671ce5604d04d6531601bf42886a55fb6aec7f321651e1dc uniq_hostname.py " diff --git a/kubezero/zdt-base/boot.sh b/kubezero/zdt-base/boot.sh new file mode 100644 index 0000000..a2f7bd9 --- /dev/null +++ b/kubezero/zdt-base/boot.sh @@ -0,0 +1,52 @@ +#!/bin/sh +# We have no metadata nor instance parameters yet! + +# We built on top of tiny-cloud +. /lib/tiny-cloud/common + +# archive orig /var, mount new var on top and restore orig var +copy_and_mount() { + local dev=$1 + + tar cf /tmp/var.tar /var 2>/dev/null + mount -t xfs -o noatime "$dev" /var + tar xf /tmp/var.tar -C / && rm -f /tmp/var.tar +} + +setup_var() { + local _devices="$(find /dev/xvd[a-z] /dev/sd[a-z] -maxdepth 0 2>/dev/null || true)" + + for d in $_devices; do + # resolve to a valid block device + dev="$(realpath "$d")" + [ -b "$dev" ] || continue + + # already mounted + mount | grep -q "$d" && continue + + case "$CLOUD" in + aws) + # on AWS look for sdx/xvdx + if [ "$d" = "/dev/sdx" -o "$d" = "/dev/xvdx" ]; then + # check volume for existing filesystem + type=$(file -Lbs $d) + if [[ "$type" =~ "XFS filesystem" ]]; then + xfs_repair $d >/dev/null 2>&1 + mount -t xfs -o noatime "$d" /var + else + mkfs.xfs -qf $d >/dev/null + copy_and_mount "$d" + fi + + add_once /etc/fstab "$d /var xfs defaults,noatime,nofail 0 2" + + log -i -t early info "mounted $d at /var" + fi + ;; + *) + ewarn "Unsupported cloud: $CLOUD" + return 1 + ;; + esac + done +} diff --git a/kubezero/zdt-base/cb_base.sh b/kubezero/zdt-base/cb_base.sh deleted file mode 100644 index 78c0501..0000000 --- a/kubezero/zdt-base/cb_base.sh +++ /dev/null @@ -1,150 +0,0 @@ -#!/bin/sh - -function log { logger -t "user-data.${_FUNC}" -- $@; } - -function die { log "$@"; exit_trap 1 1 / "$@"; } - -# msg used for sns event, last one wins -function msg { MSG="$@"; log "$@"; } - -# Generic retry command wrapper, incl. timeout of 30s -# $1 = number of tries; 0 = forever -# $2 = number of seconds to sleep between tries -# $@ actual command -retry() { - local tries=$1 - local waitfor=$2 - shift 2 - while true; do - # Only use timeout of $1 is an executable, call directly if function - type -tf $1 >/dev/null && { timeout 30 $@ && return; } || { $@ && return; } - ((tries=tries-1)) - [ $tries -eq 0 ] && return 1 - sleep $waitfor - done -} - -function add_swap() { - [ -f /.swapfile ] || { dd if=/dev/zero of=/.swapfile bs=1M count=$1 && chmod 600 /.swapfile && mkswap /.swapfile && swapon /.swapfile; } - grep -q "/.swapfile" /etc/fstab || echo "/.swapfile none swap sw 0 0" >> /etc/fstab - sysctl -w vm.swappiness=10 -} - -# Get SSM secure string base64 decoded -# $0 SSM_PATH, value to stdout -function get_secret() { - aws ssm get-parameter --name ${1,,} --with-decryption --query 'Parameter.Value' | base64 -d -} - -# Store values as base64 on SSM -# $0 SSM_PATH VALUE -function put_secret() { - aws ssm put-parameter --name ${1,,} --type SecureString --value "$(echo "$2" | base64 -w0)" --overwrite -} - -# Gets existing passphrase or creates new passphrase and stores it -function init_passphrase() { - local _URL=$1 - local _PPFILE=$2 - - # If secret already exists noop - [ -f $_PPFILE ] && return 0 - - get_secret $_URL > $_PPFILE && chmod 600 $_PPFILE || \ - { xxd -l16 -p /dev/random > $_PPFILE; chmod 600 $_PPFILE; put_secret $_URL "$(cat $_PPFILE)"; } -} - -function asg_heartbeat { - [ -n "$LAUNCH_HOOK" ] && aws autoscaling record-lifecycle-action-heartbeat --instance-id $INSTANCE_ID --lifecycle-hook-name $LAUNCH_HOOK --auto-scaling-group-name $AWS_AUTOSCALING_GROUPNAME || true -} - -function setup_sns_alarms() { - # store SNS message json template - cat < /etc/cloudbender/sns_alarm.json -{ - "Source": "CloudBender", - "AWSAccountId": "$AWS_ACCOUNT_ID", - "Region": "$REGION", - "Artifact": "$ARTIFACT", - "Asg": "$AWS_AUTOSCALING_GROUPNAME", - "Instance": "$INSTANCE_ID", - "ip": "$IP_ADDRESS" -} -EOF - mkdir -p /var/lib/cloudbender - cat < /var/lib/cloudbender/sns_alarm.sh -#!/bin/bash - -SUBJECT=\$1 -MSG=\$2 -LEVEL=\${3:-Info} -ATTACHMENT=\${4:-""} -EMOJI=\${5:-""} - -jq -M --arg subject "\$SUBJECT" --arg level "\$LEVEL" --arg msg "\$MSG" --arg attachment "\$ATTACHMENT" --arg emoji "\$EMOJI" --arg hostname "\$HOSTNAME" '.Subject = \$subject | .Level = \$level | .Message = \$msg | .Attachment = \$attachment | .Emoji = \$emoji | .Hostname = \$hostname' < /etc/cloudbender/sns_alarm.json | sed -e 's/\\\\\\\\/\\\\/g' > /tmp/sns.json -aws sns publish --region ${REGION} --target-arn $ALARMSNSARN --message file:///tmp/sns.json -EOF - chmod +x /var/lib/cloudbender/sns_alarm.sh -} - -function exit_trap { - set +e - trap - ERR EXIT - local ERR_CODE=$1 - local ERR_LINE="$2" - local ERR_FUNC="$3" - local ERR_CMD="$4" - - if [ $ERR_CODE -ne 0 ]; then - CFN_STATUS="FAILURE" - RESULT="ABANDON" - else - CFN_STATUS="SUCCESS" - RESULT="CONTINUE" - fi - - # Add SNS events on demand - if [ "x${ALARMSNSARN}" != 'x' ]; then - if [ $ERR_CODE -ne 0 ]; then - LEVEL="Error" - SUBJECT="Error during cloud-init." - if [ $ERR_LINE -ne 1 ]; then - MSG="$ERR_CMD failed in $ERR_FUNC at $ERR_LINE. Return: $ERR_CODE" - ATTACHMENT="$(pr -tn $0 | tail -n+$((ERR_LINE - 3)) | head -n7)" - else - MSG="$ERR_CMD" - fi - - if [ -n "$DEBUG" ]; then - SUBJECT="$SUBJECT Instance kept running for debug." - else - SUBJECT="$SUBJECT Instance terminated by ASG lifecycle hook." - fi - else - LEVEL="Info" - SUBJECT="ZDT Alpine Instance launched." - fi - - if [ -z "${DISABLECLOUDBENDERSNSSCALINGEVENTS}" ] || [ "$LEVEL" != "Info" ]; then - /var/lib/cloudbender/sns_alarm.sh "$SUBJECT" "$MSG" "$LEVEL" "$ATTACHMENT" - fi - - # Disable scaling events during shutdown - [ -n "${DISABLECLOUDBENDERSNSSCALINGEVENTS}" ] && echo "DISABLE_SCALING_EVENTS=1" >> /etc/cloudbender/rc.conf - fi - - [ -n "$LAUNCH_HOOK" ] && aws autoscaling complete-lifecycle-action --lifecycle-action-result $RESULT --instance-id $INSTANCE_ID --lifecycle-hook-name $LAUNCH_HOOK --auto-scaling-group-name ${AWS_AUTOSCALING_GROUPNAME} || true - - if [ -n "${AWS_CLOUDFORMATION_LOGICAL_ID}" ]; then - aws cloudformation signal-resource --stack-name ${AWS_CLOUDFORMATION_STACK_NAME} --logical-resource-id ${AWS_CLOUDFORMATION_LOGICAL_ID} --unique-id ${INSTANCE_ID} --status ${CFN_STATUS} - fi - - # timestamp being done - end_uptime=$(awk '{print $1}' < /proc/uptime) - log "Exiting user-data. Duration: $(echo "$end_uptime-$start_uptime" | bc) seconds" - - # Shutdown / poweroff if we ran into error and not DEBUG - [ $ERR_CODE -ne 0 -a -z "$DEBUG" ] && poweroff - - exit 0 -} diff --git a/kubezero/zdt-base/cb_init.sh b/kubezero/zdt-base/cb_init.sh deleted file mode 100644 index 08ad0f1..0000000 --- a/kubezero/zdt-base/cb_init.sh +++ /dev/null @@ -1,108 +0,0 @@ -# We built on top of tiny-cloud -. /etc/tiny-cloud.conf - -IMDS_ENDPOINT="169.254.169.254" -. /lib/tiny-cloud/cloud/"$CLOUD"/imds - -_imds() { - wget --quiet --timeout 1 --output-document - \ - --header "$(_imds_header)" \ - "http://$IMDS_ENDPOINT/$IMDS_URI/$1$IMDS_QUERY" -} - -function query_imds() { - MAC=$(_imds meta-data/mac) - AVAILABILITY_ZONE=$(_imds meta-data/placement/availability-zone) - REGION=$(echo ${AVAILABILITY_ZONE} | sed "s/[a-z]$//") - INSTANCE_ID=$(_imds meta-data/instance-id) - - cat <> /var/lib/cloud/meta-data -AVAILABILITY_ZONE=$AVAILABILITY_ZONE -REGION=$REGION -INSTANCE_ID=$INSTANCE_ID - -# Get the internal IP of first interface -IP_ADDRESS=$(_imds meta-data/local-ipv4) -PUBLIC_IP_ADDRESS=$(_imds meta-data/public-ipv4 || true) - -MAC=$MAC -VPC_CIDR_RANGE=$(_imds meta-data/network/interfaces/macs/${MAC}/vpc-ipv4-cidr-block) -SUBNET=$(_imds meta-data/network/interfaces/macs/${MAC}/subnet-ipv4-cidr-block) - -# Make sure we have basic domain / hostname set at all time -_META_HOSTNAME=$(_imds meta-data/hostname) -DOMAIN_NAME=\${_META_HOSTNAME#*.} -HOSTNAME=\${_META_HOSTNAME%%.*} - -AWS_ACCOUNT_ID=$(_imds meta-data/network/interfaces/macs/${MAC}/owner-id) -INSTANCE_LIFE_CYCLE=$(_imds meta-data/instance-life-cycle) - -INSTANCE_TYPE=$(_imds meta-data/instance-type) -EOF -} - -function get_tags() { - # via metadata AWS restricts tags to NOT have " " or "/" ;-( - #for key in $(_imds meta-data/tags/instance); do - # TAGS[$key]=$(_imds meta-data/tags/instance/"$key") - #done - # Replace all /:.- with _ for valid variable names - while read _key value; do - key=$(echo ${_key//[\/:.-]/_} | tr '[:lower:]' '[:upper:]') - echo "$key=\"$value\"" >> /var/lib/cloud/meta-data - done < <(aws ec2 describe-tags --filters "Name=resource-id,Values=${INSTANCE_ID}" --query 'Tags[*].[Key,Value]') -} - -# extract user-data args and cloud meta-data into /var/lib/cloud/meta-data -get_meta_data() { - if [ ! -f /var/lib/cloud/meta-data ]; then - ebegin "collecting metadata, instance tags and parameters" - - echo '#!/bin/bash' > /var/lib/cloud/meta-data - - query_imds - - export AWS_DEFAULT_REGION=$REGION - export AWS_DEFAULT_OUTPUT=text - - get_tags - - [ -f /var/lib/cloud/user-data ] && bash /var/lib/cloud/user-data extract_parameters - fi - - . /var/lib/cloud/meta-data - - # Workaround for current CFN ASG_ hack - _key=$(echo $AWS_CLOUDFORMATION_LOGICAL_ID | tr '[:lower:]' '[:upper:]') - [ -n "$(eval echo \$${_key}_CUSTOMHOSTNAME)" ] && CUSTOMHOSTNAME="$(eval echo \$${_key}_CUSTOMHOSTNAME)" - [ -n "$(eval echo \$${_key}_VOLUMES)" ] && VOLUMES="$(eval echo \$${_key}_VOLUMES)" - - return 0 -} - - -################ -# IAM SSH KEYS # -################ -cb_sshkeys() { - case "$CLOUD" in - aws) - # on AWS call IAM for allowed groups and actual keys - GROUP=${SSHKEYIAMGROUP:-""} - ROLE=${SSHKEYIAMROLE:-"arn:aws:iam::000000000000:role/Undefined"} - [ $ROLE == "arn:aws:iam::000000000000:role/Undefined" ] && ROLE="" - - if [ -n "$GROUP" ]; then - # Configure SSHD - sed -i -e "s,^[\s#]*AuthorizedKeysCommand\s.*,AuthorizedKeysCommand /usr/sbin/get_iam_sshkeys.py --user %u --group $GROUP --iamRole \"$ROLE\"," /etc/ssh/sshd_config - sed -i -e "s,^[\s#]*AuthorizedKeysCommandUser\s.*,AuthorizedKeysCommandUser nobody," /etc/ssh/sshd_config - - ebegin "added $GROUP to SSH admin keys" - fi - ;; - *) - ewarn "Unsupported Cloud: $CLOUD" - return 1 - ;; - esac -} diff --git a/kubezero/zdt-base/cb_lock.sh b/kubezero/zdt-base/cb_lock.sh deleted file mode 100644 index f8e0dea..0000000 --- a/kubezero/zdt-base/cb_lock.sh +++ /dev/null @@ -1,67 +0,0 @@ -MUTEX=mutex -MUTEX_OWNER=$HOSTNAME -MUTEX_TIMEOUT=600 - -release_lock() { - local S3LOCK=$1 - - rm -f $MUTEX - aws s3 rm $S3LOCK -} - -# Lock not timed out and we own it: 0 -# Lock not timed out and someone else owns it: 1 -# Lock timed out: 2 -verify_lock() { - local S3LOCK=$1 - - aws s3 cp $S3LOCK $MUTEX - _host=$(grep "MUTEX_OWNER=" $MUTEX | sed -e 's/MUTEX_OWNER=//') - _time=$(grep "MUTEX_TIME=" $MUTEX | sed -e 's/MUTEX_TIME=//') - - # Check for timestamp and timeout - let timepassed=$(date -u +%s)-$_time - [ $timepassed -gt $MUTEX_TIMEOUT ] && return 2 - - [ "$_host" == "$MUTEX_OWNER" ] && return 0 - return 1 -} - -aquire_lock() { - local S3LOCK=$1 - - echo "MUTEX_OWNER=${MUTEX_OWNER}" > $MUTEX - echo "MUTEX_TIME=$(date -u +%s)" >> $MUTEX - aws s3 cp $MUTEX $S3LOCK - - # verify we actually got the lock - sleep 2 - verify_lock $S3LOCK -} - -check_lock() { - local S3LOCK=$1 - - aws s3 ls $S3LOCK && rc=$? || rc=$? - - # No LOCK ? -> get it ! - if [ $rc -ne 0 ]; then - aquire_lock $S3LOCK - - else - verify_lock $S3LOCK && rc=$? || rc=$? - - # Lock timeout -> we get it - if [ $rc -eq 2 ]; then - aquire_lock $S3LOCK - - # we already own it - elif [ $rc -eq 0 ]; then - return 0 - - # someone else has a valid lock - else - return 1 - fi - fi -} diff --git a/kubezero/zdt-base/cb_volumes.sh b/kubezero/zdt-base/cb_volumes.sh deleted file mode 100644 index 3e412e3..0000000 --- a/kubezero/zdt-base/cb_volumes.sh +++ /dev/null @@ -1,130 +0,0 @@ -#!/bin/sh - -# We built on top of tiny-cloud -. /etc/tiny-cloud.conf - -# archive orig /var, mount new var on top and restore orig var -copy_and_mount() { - local dev=$1 - - tar cf /tmp/var.tar /var 2>/dev/null - mount -t xfs -o noatime "$dev" /var - tar xf /tmp/var.tar -C / && rm -f /tmp/var.tar -} - -setup_var() { - for d in $(find /dev/xvd[a-z] /dev/sd[a-z]); do - # resolve to a valid block device - dev="$(realpath "$d")" - [ -b "$dev" ] || continue - - # already mounted - mount | grep -q "$d" && continue - - case "$CLOUD" in - aws) - # on AWS look for sdx/xvdx - if [ "$d" = "/dev/sdx" -o "$d" = "/dev/xvdx" ]; then - # check volume for existing filesystem - type=$(file -Lbs $d) - if [[ "$type" =~ "XFS filesystem" ]]; then - xfs_repair $d >/dev/null 2>&1 - mount -t xfs -o noatime "$d" /var - else - mkfs.xfs -qf $d >/dev/null - copy_and_mount "$d" - fi - grep -q "$d" /etc/fstab || echo "$d /var xfs defaults,noatime,nofail 0 2" >> /etc/fstab - fi - ;; - *) - ewarn "Unsupported cloud: $CLOUD" - return 1 - ;; - esac - done -} - -attach_ebs() { - local volId="$1" - local device="$2" - - local tries=30 - while true; do - _json="$(aws ec2 describe-volumes --volume-ids $volId --region $REGION --output json)" - rc=$?; [ $rc -ne 0 ] && return $rc - - vol_status=$(echo "$_json" | jq -r .Volumes[].State) - attachId=$(echo "$_json" | jq -r .Volumes[].Attachments[].InstanceId) - - [ "$attachId" = "$INSTANCE_ID" ] && break - - if [ "$vol_status" = "available" ]; then - aws ec2 attach-volume --volume-id "$volId" --instance-id "$INSTANCE_ID" --region "$REGION" --device "$device" > /dev/null - rc=$?; [ $rc -ne 0 ] && return $rc - break - fi - - # if attached but not to us -> detach - if [ "$vol_status" = "in-use" ]; then - aws ec2 detach-volume --volume-id "$volId" --region "$REGION" --force - rc=$?; [ $rc -ne 0 ] && return $rc - fi - - ((tries=tries-1)) - [ $tries -eq 0 ] && return 1 - sleep 5 - done -} - -_parse_volume() { - # Todo: proper checks once all is yaml - # For now just replace ':' - echo $1 | sed -e 's/:/ /g' -} - -# mount optional remote volumes -mount_volumes() { - local volumes="$1" - - for vol in $volumes; do - # Todo: check volume type and call matching func - read volType volId volDevice volPath < <(_parse_volume $vol) - - [ "$volType" != "ebs" ] && { echo "Unknown volume type $volType"; break; } - attach_ebs $volId $volDevice - rc=$? - [ $rc -ne 0 ] && { ewarn "error trying to attach $volId"; break; } - - # wait for the block device to become available - while true; do - mdev -s - test -b $volDevice && break - sleep 1 - done - - # check volume for existing filesystem - type=$(file -Lbs $volDevice) - if [[ "$type" =~ "XFS filesystem" ]]; then - xfs_repair $volDevice >/dev/null 2>&1 - else - mkfs.xfs -qf $volDevice >/dev/null - fi - - # mount - mkdir -p $volPath - mount -t xfs -o noatime $volDevice $volPath - - ebegin "mounting $volDevice at $volPath" - done -} - -unmount_volumes() { - local volumes="$1" - - for vol in $volumes; do - read volType volId volDevice volPath < <(_parse_volume $vol) - - umount $volPath && aws ec2 detach-volume --volume-id "$volId" --instance-id $INSTANCE_ID --region $REGION > /dev/null - done -} diff --git a/kubezero/zdt-base/cloudbender-early.init b/kubezero/zdt-base/cloudbender-early.init index 49f9bf5..bd73521 100755 --- a/kubezero/zdt-base/cloudbender-early.init +++ b/kubezero/zdt-base/cloudbender-early.init @@ -1,20 +1,20 @@ #!/sbin/openrc-run # vim:set ts=8 noet ft=sh: -description="CloudBender early - eg. mount suitable block device at /var" +description="CloudBender early tasks (no network / metadata available yet)" depend() { - need fsck root - use lvm modules - after clock lvm modules - before bootmisc + need fsck root + use lvm modules + after clock lvm modules + before bootmisc } start() { - source /usr/lib/cloudbender/volumes.sh + source /usr/lib/cloudbender/boot.sh - ebegin "Looking for suitable /var" - setup_var - eend $? + ebegin "CloudBender - early phase" + setup_var + eend $? } diff --git a/kubezero/zdt-base/cloudbender.init b/kubezero/zdt-base/cloudbender.init index 23882c4..faca0c4 100755 --- a/kubezero/zdt-base/cloudbender.init +++ b/kubezero/zdt-base/cloudbender.init @@ -2,47 +2,49 @@ # vim:set ts=8 noet ft=sh: # -description="CloudBender - mount additional volumes, send shutdown messages" +description="CloudBender - setup meta_data, mount additional volumes, send shutdown messages" depend() { - need net - before sshd - after tiny-cloud-main + need net + before sshd + after tiny-cloud-main } start() { - source /usr/lib/cloudbender/init.sh - source /usr/lib/cloudbender/base.sh - source /usr/lib/cloudbender/volumes.sh + source /usr/lib/cloudbender/common.sh - get_meta_data + get_meta_data + import_meta_data - # mount extra volumes as early as possible - [ -n "$VOLUMES" ] && mount_volumes "$VOLUMES" + # various initial OS tweaks + setup_instance - # allow optional ssh keys, eg. via IAM for AWS - cb_sshkeys + # mount extra optional volumes + mount_volumes "$VOLUMES" - eend $? + # add optional ssh keys, eg. via IAM for AWS + configure_sshd + + set_hostname $CUSTOMHOSTNAME + + # if fixed hostname use persistent sshd keys + [ -n "$CUSTOMHOSTNAME" ] && persistent_sshd_hostkeys "/_ssh/${ARTIFACT}/${CONGLOMERATE}/${HOSTNAME}" + + eend $? } stop() { - source /usr/lib/cloudbender/init.sh - source /usr/lib/cloudbender/base.sh - source /usr/lib/cloudbender/volumes.sh + source /usr/lib/cloudbender/common.sh - get_meta_data + import_meta_data - [ -n "$VOLUMES" ] && unmount_volumes "$VOLUMES" + unmount_volumes "$VOLUMES" - # Include dynamic config setting create at boot - [ -r /etc/cloudbender/rc.conf ] && . /etc/cloudbender/rc.conf + [ -n "$DEBUG" ] && [ -r /tmp/shutdown.log ] && SHUTDOWNLOG="$(cat /tmp/shutdown.log)" - [ -n "$DEBUG" ] && [ -r /tmp/shutdown.log ] && SHUTDOWNLOG="$(cat /tmp/shutdown.log)" + [ -n "$RC_REBOOT" ] && ACTION="rebooting" || ACTION="terminated" + [ -z "$DISABLE_SCALING_EVENTS" ] && /var/lib/cloud/sns_alarm.sh "Instance $ACTION" "" Info "$SHUTDOWNLOG" - [ -n "$RC_REBOOT" ] && ACTION="rebooting" || ACTION="terminated" - [ -z "$DISABLE_SCALING_EVENTS" ] && /var/lib/cloudbender/sns_alarm.sh "Instance $ACTION" "" Info "$SHUTDOWNLOG" - - eend $? + eend $? } diff --git a/kubezero/zdt-base/common.sh b/kubezero/zdt-base/common.sh new file mode 100644 index 0000000..5cad24c --- /dev/null +++ b/kubezero/zdt-base/common.sh @@ -0,0 +1,675 @@ +# We built on top of tiny-cloud +. /lib/tiny-cloud/common + +IMDS_ENDPOINT="169.254.169.254" +. /lib/tiny-cloud/cloud/"$CLOUD"/imds + +_imds() { + wget --quiet --timeout 1 --output-document - \ + --header "$(_imds_header)" \ + "http://$IMDS_ENDPOINT/$IMDS_URI/$1$IMDS_QUERY" +} + +# Todo: This should go into a yaml file +query_imds() { + MAC=$(_imds meta-data/mac) + AVAILABILITY_ZONE=$(_imds meta-data/placement/availability-zone) + REGION=$(echo ${AVAILABILITY_ZONE} | sed "s/[a-z]$//") + INSTANCE_ID=$(_imds meta-data/instance-id) + + cat <> /var/lib/cloud/meta-data +AVAILABILITY_ZONE=$AVAILABILITY_ZONE +REGION=$REGION +INSTANCE_ID=$INSTANCE_ID +IP_ADDRESS=$(_imds meta-data/local-ipv4) +PUBLIC_IP_ADDRESS=$(_imds meta-data/public-ipv4 || true) +DEFAULT_GW_INTERFACE=$(ip -o route get 8.8.8.8 | awk '{print $5}') +MAC=$MAC +VPC_CIDR_RANGE=$(_imds meta-data/network/interfaces/macs/${MAC}/vpc-ipv4-cidr-block) +SUBNET=$(_imds meta-data/network/interfaces/macs/${MAC}/subnet-ipv4-cidr-block) +_META_HOSTNAME=$(_imds meta-data/hostname) +DOMAIN_NAME=\${_META_HOSTNAME#*.} +AWS_ACCOUNT_ID=$(_imds meta-data/network/interfaces/macs/${MAC}/owner-id) +INSTANCE_LIFE_CYCLE=$(_imds meta-data/instance-life-cycle) +INSTANCE_TYPE=$(_imds meta-data/instance-type) +EOF +} + +# Todo: This should go into a yaml file +get_tags() { + # via metadata AWS restricts tags to NOT have " " or "/" ;-( + # Replace all /:.- with _ for valid variable names + for key in $(_imds meta-data/tags/instance); do + value="$(_imds meta-data/tags/instance/$key)" + key=$(echo ${key//[\/:.-]/_} | tr '[:lower:]' '[:upper:]') + echo "$key=\"$value\"" >> /var/lib/cloud/meta-data + done + #while read _key value; do + # key=$(echo ${_key//[\/:.-]/_} | tr '[:lower:]' '[:upper:]') + # echo "$key=\"$value\"" >> /var/lib/cloud/meta-data + #done < <(aws ec2 describe-tags --filters "Name=resource-id,Values=${INSTANCE_ID}" --query 'Tags[*].[Key,Value]' --region $REGION --output text) +} + +# extract user-data args and cloud meta-data into /var/lib/cloud/meta-data +get_meta_data() { + if [ ! -f /var/lib/cloud/meta-data ]; then + echo '#!/bin/bash' > /var/lib/cloud/meta-data + + query_imds + get_tags + fi + + if [ ! -f /etc/cloudbender.conf ]; then + bash /var/lib/cloud/user-data extract_parameters + fi +} + +import_meta_data() { + . /etc/cloudbender.conf + . /var/lib/cloud/meta-data + + export AWS_DEFAULT_REGION=$REGION + export AWS_DEFAULT_OUTPUT=text + + # some basic logic + if [ "$DEBUG" == "None" -o "$DEBUG" == "False" ]; then + unset DEBUG + + LAUNCH_HOOK="CloudBenderLaunchHook" + fi + + # Workaround for current CFN ASG_ hack + _key=$(echo $AWS_CLOUDFORMATION_LOGICAL_ID | tr '[:lower:]' '[:upper:]') + [ -n "$(eval echo \$${_key}_CUSTOMHOSTNAME)" ] && CUSTOMHOSTNAME="$(eval echo \$${_key}_CUSTOMHOSTNAME)" + [ -n "$(eval echo \$${_key}_VOLUMES)" ] && VOLUMES="$(eval echo \$${_key}_VOLUMES)" + + return 0 +} + +# setup_instance, various OS tweaks impossible to do via AMI baking +setup_instance() { + # create machine-id to emulate systemd + [ -f /etc/machine-id ] || uuidgen > /etc/machine-id + + # add and mount bpf file system + add_once /etc/fstab "bpffs /sys/fs/bpf bpf rw,nosuid,nodev,noexec,relatime,mode=700 0 0" + mount -a + + # Ensure certain mounts are shared to run containers later, eg. cilium, falco + mount --make-shared /sys/fs/cgroup + mount --make-shared /sys/fs/bpf + mount --make-shared /sys + + add_once /etc/hosts "${IP_ADDRESS} ${_META_HOSTNAME} ${HOSTNAME}" + + # Set system wide default region for boto3 + echo "export AWS_DEFAULT_REGION=$REGION" > /etc/profile.d/aws.sh + + case "$CLOUD" in + aws) + setup_sns_alarms + ;; + *) + ewarn "Unsupported Cloud: $CLOUD" + # return 1 + ;; + esac +} + +################ +# IAM SSH KEYS # +################ +configure_sshd() { + # Change Listen port + local port=${SSHPORT:-"22"} + [ -w /etc/ssh/sshd_config ] && sed -i -e 's/^[\s#]*Port\s.*/Port '$port'/' /etc/ssh/sshd_config + + case "$CLOUD" in + aws) + # on AWS call IAM for allowed groups and actual keys + local group=${SSHKEYIAMGROUP:-""} + local role=${SSHKEYIAMROLE:-"arn:aws:iam::000000000000:role/Undefined"} + [ $role == "arn:aws:iam::000000000000:role/Undefined" ] && role="" + + if [ -n "$group" ]; then + # Configure SSHD + sed -i -e 's,^[\s#]*AuthorizedKeysCommand\s.*,AuthorizedKeysCommand /usr/sbin/get_iam_sshkeys.py --user %u --group '$group' --iamRole "'$role'",' /etc/ssh/sshd_config + sed -i -e 's,^[\s#]*AuthorizedKeysCommandUser\s.*,AuthorizedKeysCommandUser nobody,' /etc/ssh/sshd_config + + ebegin "added $group to SSH admin keys" + fi + ;; + *) + ewarn "Unsupported Cloud: $CLOUD" + # return 1 + ;; + esac +} + + +# Persist host keys +persistent_sshd_hostkeys() { + # Top level is artifact to be able to limit the SSM IAM permissions + local ssm_path=$1 + local key_types="dsa ecdsa ed25519 rsa" + + # If host keys exist on SSM try to download + RET=0 + for key in $key_types; do + (aws ssm get-parameters --names "${ssm_path}/host_${key}.tgz" --with-decryption --query 'Parameters[0].Value' | base64 -d | tar xzf - --directory=/ 1>/dev/null 2>&1) \ + && log -t user-data info "Restored ssh_host_${key}_key from SSM" || RET=1 + done + + # Update keys if any key couldn't be restored from SSM + if [ $RET -eq 1 ]; then + for key in $key_types; do + if [ -r /etc/ssh/ssh_host_${key}_key -a -r /etc/ssh/ssh_host_${key}_key.pub ]; then + (aws ssm put-parameter --name "${ssm_path}/host_${key}.tgz" --type SecureString --value \ + "$(tar czf - /etc/ssh/ssh_host_${key}_key /etc/ssh/ssh_host_${key}_key.pub | base64)" --overwrite) \ + && log -t user-data info "Uploaded ssh_host_${key}_key to SSM" + fi + done + fi +} + +# either plain custom hostname or +# - `unique:` eg. `uniq:kube-worker-{:02}` -> kube-worker-01 +# - `myownip: ` eg. `myip: nodegroup-` -> nodegroup-1.2.3.4 +set_hostname() { + local custom_hostname=$(echo $1 | awk -F. '{ print $1 }') + + if [ -n "$custom_hostname" ]; then + if [[ "$custom_hostname" == unique:* ]]; then + new_hostname=$(uniq_hostname.py $AWS_EC2LAUNCHTEMPLATE_ID $INSTANCE_ID ${custom_hostname##unique:}) + + elif [[ "$custom_hostname" == myownip:* ]]; then + local _ip=$(echo $IP_ADDRESS | sed -e 's/\./-/g') + new_hostname=$(echo "${custom_hostname##myownip:}$_ip") + + else + new_hostname=$custom_hostname + fi + + FQDN="${new_hostname}.${DOMAIN_NAME}" + echo ${new_hostname} > /etc/hostname + + hostname $new_hostname + export HOSTNAME=$new_hostname + + # add new hostname to hosts + add_once /etc/hosts "${IP_ADDRESS} ${FQDN} ${new_hostname}" + + log -t user-data info "Hostname updated to ${new_hostname}." + + # hup syslog to update loghost macro + /etc/init.d/syslog-ng reload + + # update Route53 entry for VPC internal FQDN + route53.py --fqdn $FQDN --record $IP_ADDRESS + + # update our Name Tag to FQDN or PrivateDNSName to allow easy indentification in the AWS UI + aws ec2 create-tags --resources $INSTANCE_ID --tags Key=Name,Value=$FQDN + else + aws ec2 create-tags --resources $INSTANCE_ID --tags Key=Name,Value=${HOSTNAME}.${REGION}.compute.internal + fi +} + +# various early volume functions +attach_ebs() { + local volId="$1" + local device="$2" + + local tries=30 + while true; do + _json="$(aws ec2 describe-volumes --volume-ids $volId --region $REGION --output json)" + rc=$?; [ $rc -ne 0 ] && return $rc + + vol_status=$(echo "$_json" | jq -r .Volumes[].State) + attachedId=$(echo "$_json" | jq -r .Volumes[].Attachments[].InstanceId) + + [ "$attachedId" = "$INSTANCE_ID" ] && break + + if [ "$vol_status" = "available" ]; then + aws ec2 attach-volume --volume-id "$volId" --instance-id "$INSTANCE_ID" --region "$REGION" --device "$device" > /dev/null + rc=$?; [ $rc -ne 0 ] && return $rc + break + fi + + # if attached but not to us -> detach + if [ "$vol_status" = "in-use" ]; then + aws ec2 detach-volume --volume-id "$volId" --region "$REGION" --force + rc=$?; [ $rc -ne 0 ] && return $rc + fi + + ((tries=tries-1)) + [ $tries -eq 0 ] && return 1 + sleep 5 + done +} + +_parse_volume() { + # Todo: proper checks once all is yaml + # For now just replace ':' + echo $1 | sed -e 's/:/ /g' +} + +# mount optional remote volumes +mount_volumes() { + local volumes="$1" + + for vol in $volumes; do + # Todo: check volume type and call matching func + read volType volId volDevice volPath < <(_parse_volume $vol) + + [ "$volType" != "ebs" ] && { echo "Unknown volume type $volType"; break; } + attach_ebs $volId $volDevice + rc=$? + [ $rc -ne 0 ] && { ewarn "error trying to attach $volId"; break; } + + # wait for the block device to become available + while true; do + mdev -s + test -b $volDevice && break + sleep 1 + done + + # check volume for existing filesystem + type=$(file -Lbs $volDevice) + if [[ "$type" =~ "XFS filesystem" ]]; then + xfs_repair $volDevice >/dev/null 2>&1 + else + mkfs.xfs -qf $volDevice >/dev/null + fi + + # mount + mkdir -p $volPath + mount -t xfs -o noatime $volDevice $volPath + + ebegin "mounting $volDevice at $volPath" + done +} + +unmount_volumes() { + local volumes="$1" + + for vol in $volumes; do + read volType volId volDevice volPath < <(_parse_volume $vol) + + umount $volPath && aws ec2 detach-volume --volume-id "$volId" --instance-id $INSTANCE_ID --region $REGION > /dev/null + done +} + +# msg used for sns event, last one wins +msg() { MSG="$@"; log -t user-data info "$@"; } + +# Generic retry command wrapper, incl. timeout of 30s +# $1 = number of tries; 0 = forever +# $2 = number of seconds to sleep between tries +# $@ actual command +retry() { + local tries=$1 + local waitfor=$2 + shift 2 + while true; do + # Only use timeout of $1 is an executable, call directly if function + type -tf $1 >/dev/null && { timeout 30 $@ && return; } || { $@ && return; } + ((tries=tries-1)) + [ $tries -eq 0 ] && return 1 + sleep $waitfor + done +} + +add_swap() { + [ -f /.swapfile ] || { dd if=/dev/zero of=/.swapfile bs=1M count=$1 && chmod 600 /.swapfile && mkswap /.swapfile && swapon /.swapfile; } + grep -q "/.swapfile" /etc/fstab || echo "/.swapfile none swap sw 0 0" >> /etc/fstab + sysctl -w vm.swappiness=10 +} + +# Get SSM secure string base64 decoded +# $0 SSM_PATH, value to stdout +get_secret() { + aws ssm get-parameter --name ${1,,} --with-decryption --query 'Parameter.Value' | base64 -d +} + +# Store values as base64 on SSM +# $0 SSM_PATH VALUE +put_secret() { + aws ssm put-parameter --name ${1,,} --type SecureString --value "$(echo "$2" | base64 -w0)" --overwrite +} + +# Gets existing passphrase or creates new passphrase and stores it +init_passphrase() { + local _URL=$1 + local _PPFILE=$2 + + # If secret already exists noop + [ -f $_PPFILE ] && return 0 + + get_secret $_URL > $_PPFILE && chmod 600 $_PPFILE || \ + { xxd -l16 -p /dev/random > $_PPFILE; chmod 600 $_PPFILE; put_secret $_URL "$(cat $_PPFILE)"; } +} + +asg_heartbeat() { + [ -n "$LAUNCH_HOOK" ] && aws autoscaling record-lifecycle-action-heartbeat --instance-id $INSTANCE_ID --lifecycle-hook-name $LAUNCH_HOOK --auto-scaling-group-name $AWS_AUTOSCALING_GROUPNAME || true +} + +setup_sns_alarms() { + # store SNS message json template + cat < /var/lib/cloud/sns_alarm.json +{ + "Source": "CloudBender", + "AWSAccountId": "$AWS_ACCOUNT_ID", + "Region": "$REGION", + "Artifact": "$ARTIFACT", + "Asg": "$AWS_AUTOSCALING_GROUPNAME", + "Instance": "$INSTANCE_ID", + "ip": "$IP_ADDRESS" +} +EOF + + cat <<'EOF' > /var/lib/cloud/sns_alarm.sh +#!/bin/bash + +SUBJECT=$1 +MSG=$2 +LEVEL=${3:-Info} +ATTACHMENT=${4:-""} +EMOJI=${5:-""} +EOF + if [ -n "$ALARMSNSARN" ]; then + cat <> /var/lib/cloud/sns_alarm.sh +jq -M --arg subject "\$SUBJECT" --arg level "\$LEVEL" --arg msg "\$MSG" --arg attachment "\$ATTACHMENT" --arg emoji "\$EMOJI" --arg hostname "\$HOSTNAME" '.Subject = \$subject | .Level = \$level | .Message = \$msg | .Attachment = \$attachment | .Emoji = \$emoji | .Hostname = \$hostname' < /var/lib/cloud/sns_alarm.json | sed -e 's/\\\\\\\\/\\\\/g' > /tmp/sns.json +aws sns publish --region ${REGION} --target-arn $ALARMSNSARN --message file:///tmp/sns.json +EOF + fi + + chmod +x /var/lib/cloud/sns_alarm.sh +} + +exit_trap() { + set +e + trap - ERR EXIT + local ERR_CODE=$1 + local ERR_LINE="$2" + local ERR_FUNC="$3" + local ERR_CMD="$4" + + if [ $ERR_CODE -ne 0 ]; then + CFN_STATUS="FAILURE" + RESULT="ABANDON" + else + CFN_STATUS="SUCCESS" + RESULT="CONTINUE" + fi + + # Add SNS events on demand + if [ -n "$ALARMSNSARN" ]; then + if [ $ERR_CODE -ne 0 ]; then + LEVEL="Error" + SUBJECT="Error during cloud-init." + if [ $ERR_LINE -ne 1 ]; then + MSG="$ERR_CMD failed in $ERR_FUNC at $ERR_LINE. Return: $ERR_CODE" + ATTACHMENT="$(pr -tn $0 | tail -n+$((ERR_LINE - 3)) | head -n7)" + else + MSG="$ERR_CMD" + fi + + if [ -n "$DEBUG" ]; then + SUBJECT="$SUBJECT Instance kept running for debug." + else + SUBJECT="$SUBJECT Instance terminated by ASG lifecycle hook." + fi + else + LEVEL="Info" + SUBJECT="ZDT Alpine Instance launched." + fi + + if [ -z "${DISABLECLOUDBENDERSNSSCALINGEVENTS}" ] || [ "$LEVEL" != "Info" ]; then + /var/lib/cloud/sns_alarm.sh "$SUBJECT" "$MSG" "$LEVEL" "$ATTACHMENT" + fi + + # Disable scaling events during shutdown + [ -n "${DISABLECLOUDBENDERSNSSCALINGEVENTS}" ] && echo "DISABLE_SCALING_EVENTS=1" >> /etc/cloudbender.conf + fi + + [ -n "$LAUNCH_HOOK" ] && aws autoscaling complete-lifecycle-action --lifecycle-action-result $RESULT --instance-id $INSTANCE_ID --lifecycle-hook-name $LAUNCH_HOOK --auto-scaling-group-name ${AWS_AUTOSCALING_GROUPNAME} || true + + if [ -n "${AWS_CLOUDFORMATION_LOGICAL_ID}" ]; then + aws cloudformation signal-resource --stack-name ${AWS_CLOUDFORMATION_STACK_NAME} --logical-resource-id ${AWS_CLOUDFORMATION_LOGICAL_ID} --unique-id ${INSTANCE_ID} --status ${CFN_STATUS} + fi + + # timestamp being done + end_uptime=$(awk '{print $1}' < /proc/uptime) + log -t user-data info "Exiting user-data. Duration: $(echo "$end_uptime-$start_uptime" | bc) seconds" + + # Shutdown / poweroff if we ran into error and not DEBUG + [ $ERR_CODE -ne 0 -a -z "$DEBUG" ] && poweroff + + exit 0 +} + +### S3 based locking +MUTEX=mutex +MUTEX_OWNER=$HOSTNAME +MUTEX_TIMEOUT=600 + +release_lock() { + local S3LOCK=$1 + + rm -f $MUTEX + aws s3 rm $S3LOCK +} + +# Lock not timed out and we own it: 0 +# Lock not timed out and someone else owns it: 1 +# Lock timed out: 2 +verify_lock() { + local S3LOCK=$1 + + aws s3 cp $S3LOCK $MUTEX + _host=$(grep "MUTEX_OWNER=" $MUTEX | sed -e 's/MUTEX_OWNER=//') + _time=$(grep "MUTEX_TIME=" $MUTEX | sed -e 's/MUTEX_TIME=//') + + # Check for timestamp and timeout + let timepassed=$(date -u +%s)-$_time + [ $timepassed -gt $MUTEX_TIMEOUT ] && return 2 + + [ "$_host" == "$MUTEX_OWNER" ] && return 0 + return 1 +} + +aquire_lock() { + local S3LOCK=$1 + + echo "MUTEX_OWNER=${MUTEX_OWNER}" > $MUTEX + echo "MUTEX_TIME=$(date -u +%s)" >> $MUTEX + aws s3 cp $MUTEX $S3LOCK + + # verify we actually got the lock + sleep 2 + verify_lock $S3LOCK +} + +check_lock() { + local S3LOCK=$1 + + aws s3 ls $S3LOCK && rc=$? || rc=$? + + # No LOCK ? -> get it ! + if [ $rc -ne 0 ]; then + aquire_lock $S3LOCK + + else + verify_lock $S3LOCK && rc=$? || rc=$? + + # Lock timeout -> we get it + if [ $rc -eq 2 ]; then + aquire_lock $S3LOCK + + # we already own it + elif [ $rc -eq 0 ]; then + return 0 + + # someone else has a valid lock + else + return 1 + fi + fi +} + +# All things networking +enable_ip_forwarding() { + local interface=$1 + + modprobe nf_conntrack + + cat < /etc/sysctl.d/40-ip-forward.conf +net.ipv4.ip_forward=1 +net.ipv4.ip_local_port_range = 1024 65535 +net.ipv4.conf.$interface.send_redirects=0 +net.ipv4.conf.all.rp_filter = 1 +net.ipv4.conf.all.accept_redirects = 0 +net.ipv6.conf.default.forwarding = 1 +net.ipv6.conf.all.forwarding = 1 +net.netfilter.nf_conntrack_max = 524288 +EOF + sysctl -p /etc/sysctl.d/40-ip-forward.conf +} + +enable_iptables() { + rc-update add iptables + /etc/init.d/iptables save +} + + +register_service_dns() { + if [ -n "$DNSZONE" -a -n "$SERVICENAME" ]; then + _IP=${PUBLIC_IP_ADDRESS:-$IP_ADDRESS} + [ -n "SERVICEPRIVATE" ] && _IP=$IP_ADDRESS + route53.py --fqdn "${SERVICENAME}.${DNSZONE}" --record $_IP + + cat <> /etc/local.d/route53.stop +echo "Deleting Route53 record for ${SERVICENAME}.${DNSZONE}" >> /tmp/shutdown.log +route53.py --delete --fqdn "${SERVICENAME}.${DNSZONE}" --record ${PUBLIC_IP_ADDRESS:-$IP_ADDRESS} +EOF + chmod +x /etc/local.d/route53.stop + + # Short cut our public IP to private one to allow talking to our own service name + add_once /etc/hosts "${IP_ADDRESS} ${SERVICENAME}.${DNSZONE}" + fi +} + + +# associate EIP +# return 0 if we attache EIP +# return 1 if we public did NOT change +associate_eip() { + local instance_id=$1 + local eip=$(echo $2 | sed -e 's/\/32//' | grep -E -o "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)") || true + local current_instance + + if [ -n "$eip" ]; then + if [ "$eip" != "0.0.0.0" ]; then + read eip_alloc_id eip_assoc_id current_instance < <(aws ec2 describe-addresses --public-ips $eip --query 'Addresses[*].[AllocationId,AssociationId,InstanceId]' || true) + + # If we already own and have the EIP attached -> done + [ "$instance_id" == "$current_instance" ] && return + + if [ ! -z "$eip_alloc_id" ]; then + if [[ "$eip_assoc_id" =~ ^eipassoc- ]]; then + log -t user-data info "EIP $eip already associated via Association ID ${eip_assoc_id}. Disassociating." + retry 3 10 aws ec2 disassociate-address --association-id $eip_assoc_id + fi + + log -t user-data info "Associating Elastic IP $eip via Allocation ID $eip_alloc_id with Instance $instance_id" + aws ec2 associate-address --no-allow-reassociation --instance-id $instance_id --allocation-id $eip_alloc_id + return + + else + log -t user-data warn "Elastic IP $eip address not found." + fi + else + log -t user-data info "0.0.0.0 requested, keeping AWS assigned IP." + fi + else + log -t user-data debug "Invalid or no ElasticIP defined. Skip" + fi + + return 1 +} + + + + +# Accept incoming traffic for everything +disable_source_dest_check() { + aws ec2 modify-instance-attribute --instance-id ${INSTANCE_ID} --source-dest-check "{\"Value\": false}" +} + + +# Register ourself at route tables +register_routes() { + local rtb_id_list=$1 + local route_cidr=${2:-"0.0.0.0/0"} + + for cidr in ${route_cidr//,/ }; do + for rt in ${rtb_id_list//,/ }; do + [[ "$rt" =~ ^rtb-[a-f0-9]*$ ]] || { log -t user-data warn "Invalid Route Table ID: $rt"; return 1; } + + aws ec2 create-route --route-table-id $rt --destination-cidr-block "${cidr}" --instance-id ${INSTANCE_ID} || \ + aws ec2 replace-route --route-table-id $rt --destination-cidr-block "${cidr}" --instance-id ${INSTANCE_ID} + done + done +} + + +setup_nat() { + local mode=$1 + + # Masquerade all outgoing traffic + iptables -t nat -A POSTROUTING -o $DEFAULT_GW_INTERFACE -s ${VPC_CIDR_RANGE} -j MASQUERADE +} + + +setup_fluentbit() { + local token="cloudbender" + + if [[ $FLUENTDURL == *@* ]]; then + token=${FLUENTDURL%%@*} + FLUENTD_URL=${FLUENTDURL##*@} + fi + LOG_FILES=$FLUENTDLOGFILES + + # Add a local file based syslog parser which does not require Priority + cat < /etc/fluent-bit/metadata.conf +# add some AWS metadata +[FILTER] + Name record_modifier + Match * + Record source.ip $IP_ADDRESS + Record source.instance_id $INSTANCE_ID + Record source.region $REGION + Record source.account $AWS_ACCOUNT_ID + Record source.conglomerate $CONGLOMERATE + Record source.artifact $ARTIFACT +EOF + + # install logrotate fragment + cat < /etc/logrotate.d/fluentbit +/var/log/fluentbit.log +{ + rotate 3 + missingok + notifempty + compress + maxsize 10M + daily + postrotate + rc-service fluent-bit restart + endscript +} +EOF + + rc-update add fluent-bit default + rc-service fluent-bit start +} diff --git a/kubezero/zdt-base/monit_alert.sh.aws b/kubezero/zdt-base/monit_alert.sh.aws index 636d77c..126f4c7 100755 --- a/kubezero/zdt-base/monit_alert.sh.aws +++ b/kubezero/zdt-base/monit_alert.sh.aws @@ -6,6 +6,6 @@ shift ATTACHMENT="$@" if [ -n "${MONIT_SERVICE}${MONIT_EVENT}" -a -n "$MONIT_DESCRIPTION" ]; then - [ -x /var/lib/cloudbender/sns_alarm.sh ] && \ - /var/lib/cloudbender/sns_alarm.sh "$MONIT_SERVICE - $MONIT_EVENT" "$MONIT_DESCRIPTION" "$LEVEL" "$ATTACHMENT" + [ -x /var/lib/cloud/sns_alarm.sh ] && \ + /var/lib/cloud/sns_alarm.sh "$MONIT_SERVICE - $MONIT_EVENT" "$MONIT_DESCRIPTION" "$LEVEL" "$ATTACHMENT" fi diff --git a/kubezero/zdt-base/monitrc b/kubezero/zdt-base/monitrc index 5743962..abec2e2 100644 --- a/kubezero/zdt-base/monitrc +++ b/kubezero/zdt-base/monitrc @@ -1,6 +1,6 @@ -# Give instance 3 min to settle after boot set daemon 30 - with start delay 180 +# add `for 2 cycles` might be better than this intial block +# with start delay 120 set log syslog diff --git a/kubezero/zdt-base/uniq_hostname.py b/kubezero/zdt-base/uniq_hostname.py index 587d2c0..1801582 100755 --- a/kubezero/zdt-base/uniq_hostname.py +++ b/kubezero/zdt-base/uniq_hostname.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # use pyminify: pyminifier --obfuscate-variables $0 > minified_$0 diff --git a/kubezero/zdt-base/zdt-base.post-install b/kubezero/zdt-base/zdt-base.post-install index c3afc58..615ecd0 100644 --- a/kubezero/zdt-base/zdt-base.post-install +++ b/kubezero/zdt-base/zdt-base.post-install @@ -13,12 +13,15 @@ echo 'enabled cgroupv2, openRC logging' #sed -i -e 's/^[\s#]*rc_parallel=.*/rc_parallel="YES"/' /etc/rc.conf #echo 'enable parallel openRC' +# load falco kernel module at boot +grep -q falco /etc/modules || echo falco >> /etc/modules + # Setup syslog-ng json logging and apparmor tweaks cp /lib/zdt/syslog-ng.conf /etc/syslog-ng/syslog-ng.conf cp /lib/zdt/syslog-ng.logrotate.conf /etc/logrotate.d/syslog-ng cp /lib/zdt/syslog-ng.apparmor /etc/apparmor.d/local/sbin.syslog-ng -mv /etc/periodic/daily/logrotate /etc/periodic/hourly/ +[ -f /etc/periodic/daily/logrotate ] && mv /etc/periodic/daily/logrotate /etc/periodic/hourly/ echo 'syslog-ng: all to /var/log/messages as json, rotate hourly' # use init to spawn monit @@ -26,5 +29,5 @@ echo ":2345:respawn:/usr/bin/monit -Ic /etc/monitrc.zdt" >> /etc/inittab echo 'Enable monit via inittab' # QoL -mv /etc/profile.d/color_prompt.sh.disabled /etc/profile.d/color_prompt.sh || true +[ -f /etc/profile.d/color_prompt.sh.disabled ] && mv /etc/profile.d/color_prompt.sh.disabled /etc/profile.d/color_prompt.sh || true echo 'alias rs="doas bash"' > /etc/profile.d/alias.sh diff --git a/scripts/rebuild_new_kernel.sh b/scripts/rebuild_new_kernel.sh new file mode 100755 index 0000000..43b51ba --- /dev/null +++ b/scripts/rebuild_new_kernel.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +PACKETS="falco-kernel nvidia-open-gpu aws-neuron-driver" + +for p in $PACKETS; do + rm -f packages/kubezero/*/$p*.apk + make apk PKG=kubezero/$p/APKBUILD +done + +make upload