From 2da561ba5861917075296ecb8ca3714beedaeb1f Mon Sep 17 00:00:00 2001 From: Stefan Reimer Date: Wed, 15 Dec 2021 23:19:15 +0100 Subject: [PATCH] feat: new etcd setup, drop kubeadm Join --- containers/admin/Makefile | 4 +- containers/admin/v1.21/kubeadm/README.md | 5 +- .../templates/ClusterConfiguration.yaml | 21 ++-- .../kubeadm/templates/InitConfiguration.yaml | 8 +- .../kubeadm/templates/JoinConfiguration.yaml | 24 ----- .../v1.21/kubeadm/templates/_helpers.tpl | 14 ++- containers/admin/v1.21/kubeadm/values.yaml | 5 +- containers/admin/v1.21/kubezero.sh | 97 ++++++++++++------- containers/admin/v1.21/kubezero_121.sh | 4 +- 9 files changed, 105 insertions(+), 77 deletions(-) delete mode 100644 containers/admin/v1.21/kubeadm/templates/JoinConfiguration.yaml diff --git a/containers/admin/Makefile b/containers/admin/Makefile index c20ec79..04ad919 100644 --- a/containers/admin/Makefile +++ b/containers/admin/Makefile @@ -7,7 +7,7 @@ KUBE_VERSION := $(shell echo $(VERSION) | sed -e 's/\.[[:digit:]]*$$//') .PHONY: build push clean scan -all: clean build push +all: build push build: podman build --rm --squash-all --build-arg KUBE_VERSION=$(KUBE_VERSION) --build-arg ALPINE_VERSION=$(ALPINE_VERSION) -t $(TAG) . @@ -18,7 +18,7 @@ push: podman push $(REGISTRY)/$(TAG) clean: - podman rmi -f $(TAG) + podman image prune -f scan: podman system service& diff --git a/containers/admin/v1.21/kubeadm/README.md b/containers/admin/v1.21/kubeadm/README.md index c893431..613538a 100644 --- a/containers/admin/v1.21/kubeadm/README.md +++ b/containers/admin/v1.21/kubeadm/README.md @@ -25,12 +25,12 @@ Kubernetes: `>= 1.20.0` | addons.clusterBackup.enabled | bool | `false` | | | addons.clusterBackup.passwordFile | string | `""` | /etc/cloudbender/clusterBackup.passphrase | | addons.clusterBackup.repository | string | `""` | s3:https://s3.amazonaws.com/${CFN[ConfigBucket]}/k8s/${CLUSTERNAME}/clusterBackup | -| api.allEtcdEndpoints | string | `""` | | | api.apiAudiences | string | `"istio-ca"` | | | api.awsIamAuth.enabled | bool | `false` | | | api.awsIamAuth.kubeAdminRole | string | `"arn:aws:iam::000000000000:role/KubernetesNode"` | | | api.awsIamAuth.workerNodeRole | string | `"arn:aws:iam::000000000000:role/KubernetesNode"` | | | api.endpoint | string | `"kube-api.changeme.org:6443"` | | +| api.etcdServers | string | `"https://localhost:2379"` | | | api.extraArgs | object | `{}` | | | api.listenPort | int | `6443` | | | api.oidcEndpoint | string | `""` | s3://${CFN[ConfigBucket]}/k8s/$CLUSTERNAME | @@ -38,7 +38,8 @@ Kubernetes: `>= 1.20.0` | clusterName | string | `"pleasechangeme"` | | | domain | string | `"changeme.org"` | | | etcd.extraArgs | object | `{}` | | -| etcd.nodeName | string | `"set_via_cmdline"` | | +| etcd.nodeName | string | `"etcd"` | | +| etcd.state | string | `"new"` | | | highAvailable | bool | `false` | | | listenAddress | string | `"0.0.0.0"` | Needs to be set to primary node IP | | network.calico.enabled | bool | `false` | | diff --git a/containers/admin/v1.21/kubeadm/templates/ClusterConfiguration.yaml b/containers/admin/v1.21/kubeadm/templates/ClusterConfiguration.yaml index a7c5fc1..78ca579 100644 --- a/containers/admin/v1.21/kubeadm/templates/ClusterConfiguration.yaml +++ b/containers/admin/v1.21/kubeadm/templates/ClusterConfiguration.yaml @@ -11,22 +11,25 @@ etcd: ### DNS discovery #discovery-srv: {{ .Values.domain }} #discovery-srv-name: {{ .Values.clusterName }} - #initial-cluster: + advertise-client-urls: https://{{ .Values.etcd.nodeName }}:2379 + initial-advertise-peer-urls: https://{{ .Values.etcd.nodeName }}:2380 + initial-cluster: {{ include "kubeadm.etcd.initialCluster" .Values.etcd | quote }} + initial-cluster-state: {{ .Values.etcd.state }} initial-cluster-token: etcd-{{ .Values.clusterName }} - listen-metrics-urls: "http://0.0.0.0:2381" - logger: "zap" + name: {{ .Values.etcd.nodeName }} + listen-peer-urls: https://{{ .Values.listenAddress }}:2380 + listen-client-urls: https://{{ .Values.listenAddress }}:2379 + listen-metrics-urls: http://0.0.0.0:2381 + logger: zap # log-level: "warn" {{- with .Values.etcd.extraArgs }} {{- toYaml . | nindent 6 }} {{- end }} - # These will only be used to create the etcd certs but removed for Init/Join kudeadm calls allowing us to sneak in aliases for etcd nodes serverCertSANs: - - "{{ .Values.listenAddress }}" - "{{ .Values.etcd.nodeName }}" - "{{ .Values.etcd.nodeName }}.{{ .Values.domain }}" - "{{ .Values.domain }}" peerCertSANs: - - "{{ .Values.listenAddress }}" - "{{ .Values.etcd.nodeName }}" - "{{ .Values.etcd.nodeName }}.{{ .Values.domain }}" - "{{ .Values.domain }}" @@ -34,20 +37,20 @@ controllerManager: extraArgs: profiling: "false" terminated-pod-gc-threshold: "300" - # leader-elect: {{ .Values.highAvailable | quote }} + leader-elect: {{ .Values.highAvailable | quote }} logging-format: json feature-gates: {{ include "kubeadm.featuregates" ( dict "return" "csv" ) | trimSuffix "," | quote }} scheduler: extraArgs: profiling: "false" - # leader-elect: {{ .Values.highAvailable | quote }} + leader-elect: {{ .Values.highAvailable | quote }} logging-format: json feature-gates: {{ include "kubeadm.featuregates" ( dict "return" "csv" ) | trimSuffix "," | quote }} apiServer: certSANs: - {{ regexSplit ":" .Values.api.endpoint -1 | first }} extraArgs: - etcd-servers: {{ .Values.api.allEtcdEndpoints }} + etcd-servers: {{ .Values.api.etcdServers }} profiling: "false" audit-log-path: "/var/log/kubernetes/audit.log" audit-policy-file: /etc/kubernetes/apiserver/audit-policy.yaml diff --git a/containers/admin/v1.21/kubeadm/templates/InitConfiguration.yaml b/containers/admin/v1.21/kubeadm/templates/InitConfiguration.yaml index 41fa1c1..908f86f 100644 --- a/containers/admin/v1.21/kubeadm/templates/InitConfiguration.yaml +++ b/containers/admin/v1.21/kubeadm/templates/InitConfiguration.yaml @@ -5,11 +5,17 @@ localAPIEndpoint: bindPort: {{ .Values.api.listenPort }} nodeRegistration: ignorePreflightErrors: - - Swap - DirAvailable--var-lib-etcd + - DirAvailable--etc-kubernetes-manifests + - FileAvailable--etc-kubernetes-pki-ca.crt + - FileAvailable--etc-kubernetes-manifests-etcd.yaml + - Swap - KubeletVersion kubeletExtraArgs: node-labels: {{ .Values.nodeLabels | quote }} {{- with .Values.providerID }} provider-id: {{ . }} {{- end }} + {{- if ne .Values.listenAddress "0.0.0.0" }} + node-ip: {{ .Values.listenAddress }} + {{- end }} diff --git a/containers/admin/v1.21/kubeadm/templates/JoinConfiguration.yaml b/containers/admin/v1.21/kubeadm/templates/JoinConfiguration.yaml deleted file mode 100644 index 7582409..0000000 --- a/containers/admin/v1.21/kubeadm/templates/JoinConfiguration.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# This is for controllers only, workers dont use kubeadm -apiVersion: kubeadm.k8s.io/v1beta2 -kind: JoinConfiguration -discovery: - file: - kubeConfigPath: /root/.kube/config -controlPlane: - localAPIEndpoint: - advertiseAddress: {{ .Values.listenAddress }} - bindPort: {{ .Values.api.listenPort }} -nodeRegistration: - ignorePreflightErrors: - - DirAvailable--var-lib-etcd - - FileAvailable--etc-kubernetes-pki-ca.crt - - Swap - - KubeletVersion - kubeletExtraArgs: - node-labels: {{ .Values.nodeLabels | quote }} - {{- with .Values.providerID }} - provider-id: {{ . }} - {{- end }} - {{- if ne .Values.listenAddress "0.0.0.0" }} - node-ip: {{ .Values.listenAddress }} - {{- end }} diff --git a/containers/admin/v1.21/kubeadm/templates/_helpers.tpl b/containers/admin/v1.21/kubeadm/templates/_helpers.tpl index e41eedd..19b2811 100644 --- a/containers/admin/v1.21/kubeadm/templates/_helpers.tpl +++ b/containers/admin/v1.21/kubeadm/templates/_helpers.tpl @@ -1,6 +1,4 @@ -{{- /* -Feature gates for all control plane components -*/ -}} +{{- /* Feature gates for all control plane components */ -}} {{- define "kubeadm.featuregates" -}} {{- $gates := list "CustomCPUCFSQuotaPeriod" "GenericEphemeralVolume" "InTreePluginAWSUnregister" "InTreePluginAzureDiskUnregister" "InTreePluginAzureFileUnregister" "InTreePluginGCEUnregister" "InTreePluginOpenStackUnregister" }} {{- if eq .return "csv" }} @@ -13,3 +11,13 @@ Feature gates for all control plane components {{- end }} {{- end }} {{- end -}} + + +{{- /* Etcd default initial cluster */ -}} +{{- define "kubeadm.etcd.initialCluster" -}} +{{- if .initialCluster -}} +{{ .initialCluster }} +{{- else -}} +{{ .nodeName }}=https://{{ .nodeName }}:2380 +{{- end -}} +{{- end -}} diff --git a/containers/admin/v1.21/kubeadm/values.yaml b/containers/admin/v1.21/kubeadm/values.yaml index 107e34d..2086ce5 100644 --- a/containers/admin/v1.21/kubeadm/values.yaml +++ b/containers/admin/v1.21/kubeadm/values.yaml @@ -9,7 +9,7 @@ listenAddress: 0.0.0.0 api: endpoint: kube-api.changeme.org:6443 listenPort: 6443 - allEtcdEndpoints: "" + etcdServers: "https://localhost:2379" extraArgs: {} # -- https://s3.${REGION}.amazonaws.com/${CFN[ConfigBucket]}/k8s/$CLUSTERNAME serviceAccountIssuer: "" @@ -47,7 +47,8 @@ network: highAvailable: false etcd: - nodeName: set_via_cmdline + nodeName: etcd + state: new extraArgs: {} # -- Set to false for openrc, eg. on Gentoo or Alpine diff --git a/containers/admin/v1.21/kubezero.sh b/containers/admin/v1.21/kubezero.sh index 3e9fd31..ddc6ce0 100755 --- a/containers/admin/v1.21/kubezero.sh +++ b/containers/admin/v1.21/kubezero.sh @@ -7,6 +7,12 @@ VERSION=v1.21 export KUBECONFIG="${HOSTFS}/root/.kube/config" +# etcd +export ETCDCTL_API=3 +export ETCDCTL_CACERT=${HOSTFS}/etc/kubernetes/pki/etcd/ca.crt +export ETCDCTL_CERT=${HOSTFS}/etc/kubernetes/pki/apiserver-etcd-client.crt +export ETCDCTL_KEY=${HOSTFS}/etc/kubernetes/pki/apiserver-etcd-client.key + if [ -n "$DEBUG" ]; then set -x LOG="--v=5" @@ -27,24 +33,22 @@ retry() { } +_kubeadm() { + kubeadm $@ --config /etc/kubernetes/kubeadm.yaml --rootfs ${HOSTFS} $LOG +} + + # Render cluster config render_kubeadm() { helm template /opt/kubeadm --output-dir ${WORKDIR} -f ${HOSTFS}/etc/kubernetes/kubezero.yaml # Assemble kubeadm config - cat /dev/null > ${HOSTFS}/etc/kubernetes/kubeadm-etcd.yaml + cat /dev/null > ${HOSTFS}/etc/kubernetes/kubeadm.yaml for f in Cluster Init KubeProxy Kubelet; do - # echo "---" >> /etc/kubernetes/kubeadm-etcd.yaml - cat ${WORKDIR}/kubeadm/templates/${f}Configuration.yaml >> ${HOSTFS}/etc/kubernetes/kubeadm-etcd.yaml + # echo "---" >> /etc/kubernetes/kubeadm.yaml + cat ${WORKDIR}/kubeadm/templates/${f}Configuration.yaml >> ${HOSTFS}/etc/kubernetes/kubeadm.yaml done - # Remove etcd custom cert entries from final kubeadm config - yq eval 'del(.etcd.local.serverCertSANs) | del(.etcd.local.peerCertSANs)' \ - ${HOSTFS}/etc/kubernetes/kubeadm-etcd.yaml > ${HOSTFS}/etc/kubernetes/kubeadm.yaml - - # Copy JoinConfig - cp ${WORKDIR}/kubeadm/templates/JoinConfiguration.yaml ${HOSTFS}/etc/kubernetes - # hack to "uncloack" the json patches after they go processed by helm for s in apiserver controller-manager scheduler; do yq eval '.json' ${WORKDIR}/kubeadm/templates/patches/kube-${s}1\+json.yaml > /tmp/_tmp.yaml && \ @@ -58,7 +62,7 @@ parse_kubezero() { KUBE_VERSION=$(kubeadm version -o yaml | yq eval .clientVersion.gitVersion -) CLUSTERNAME=$(yq eval '.clusterName' ${HOSTFS}/etc/kubernetes/kubezero.yaml) - NODENAME=$(yq eval '.nodeName' ${HOSTFS}/etc/kubernetes/kubezero.yaml) + ETCD_NODENAME=$(yq eval '.etcd.nodeName' ${HOSTFS}/etc/kubernetes/kubezero.yaml) AWS_IAM_AUTH=$(yq eval '.api.awsIamAuth.enabled' ${HOSTFS}/etc/kubernetes/kubezero.yaml) AWS_NTH=$(yq eval '.addons.aws-node-termination-handler.enabled' ${HOSTFS}/etc/kubernetes/kubezero.yaml) @@ -125,8 +129,7 @@ if [ "$1" == 'upgrade' ]; then pre_kubeadm # Upgrade - kubeadm upgrade apply --config /etc/kubernetes/kubeadm.yaml --rootfs ${HOSTFS} \ - --experimental-patches /tmp/patches $LOG -y + _kubeadm upgrade apply -y --experimental-patches /tmp/patches post_kubeadm @@ -187,6 +190,7 @@ elif [[ "$1" == 'node-upgrade' ]]; then # enable backup and awsIamAuth. multus, match other reorg yq -Mi e '.api.awsIamAuth.enabled = "true" | .api.awsIamAuth.workerNodeRole = .workerNodeRole | .api.awsIamAuth.kubeAdminRole = .kubeAdminRole | .api.serviceAccountIssuer = .serviceAccountIssuer | .api.apiAudiences = "istio-ca,sts.amazonaws.com" + | .api.etcdServers = .api.allEtcdEndpoints | .network.multus.enabled = "true" | .addons.clusterBackup.enabled = "true" | .addons.clusterBackup.repository = strenv(restic_repo) | .addons.clusterBackup.password = strenv(restic_pw) | .addons.clusterBackup.extraEnv[0].name = "AWS_DEFAULT_REGION" | .addons.clusterBackup.extraEnv[0].value = strenv(REGION) @@ -202,33 +206,66 @@ elif [[ "$1" =~ "^(bootstrap|recover|join)$" ]]; then # Recert certificates for THIS node rm -f ${HOSTFS}/etc/kubernetes/pki/etcd/peer.* ${HOSTFS}/etc/kubernetes/pki/etcd/server.* ${HOSTFS}/etc/kubernetes/pki/apiserver.* - kubeadm init phase certs etcd-server --config=/etc/kubernetes/kubeadm-etcd.yaml --rootfs ${HOSTFS} - kubeadm init phase certs etcd-peer --config=/etc/kubernetes/kubeadm-etcd.yaml --rootfs ${HOSTFS} - kubeadm init phase certs apiserver --config=/etc/kubernetes/kubeadm.yaml --rootfs ${HOSTFS} + _kubeadm init phase certs etcd-server + _kubeadm init phase certs etcd-peer + _kubeadm init phase certs apiserver # Restore only etcd for desaster recovery if [[ "$1" =~ "^(recover)$" ]]; then - etcdctl snapshot restore ${HOSTFS}/etc/kubernetes \ - --name $NODENAME \ + etcdctl snapshot restore ${HOSTFS}/etc/kubernetes/etcd_snapshot \ + --name $ETCD_NODENAME \ --data-dir="${HOSTFS}/var/lib/etcd" \ - --initial-cluster-token ${CLUSTERNAME} \ - --initial-advertise-peer-urls https://${NODENAME}:2380 \ - --initial-cluster $NODENAME=https://${NODENAME}:2380 + --initial-cluster-token etcd-${CLUSTERNAME} \ + --initial-advertise-peer-urls https://${ETCD_NODENAME}:2380 \ + --initial-cluster $ETCD_NODENAME=https://${ETCD_NODENAME}:2380 fi # Create all certs during bootstrap else - kubeadm init phase certs all --config=/etc/kubernetes/kubeadm-etcd.yaml --rootfs ${HOSTFS} + _kubeadm init phase certs all fi pre_kubeadm if [[ "$1" =~ "^(join)$" ]]; then - kubeadm join --config /etc/kubernetes/JoinConfiguration.yaml --rootfs ${HOSTFS} \ - --experimental-patches /tmp/patches $LOG + + _kubeadm init phase preflight + _kubeadm init phase kubeconfig all + _kubeadm init phase kubelet-start + + # first get current running etcd pods for etcdctl commands + # retry in case other nodes join / API fails / etcd leader changes etc. + while true; do + etcd_endpoints=$(kubectl get pods -n kube-system -l component=etcd -o yaml | \ + yq eval '.items[].metadata.annotations."kubeadm.kubernetes.io/etcd.advertise-client-urls"' - | tr '\n' ',' | sed -e 's/,$//') + [[ $etcd_endpoints =~ ^https:// ]] && break + sleep 3 + done + + # is our $ETCD_NODENAME already in the etcd cluster ? + # Remove former self first + MY_ID=$(etcdctl member list --endpoints=$etcd_endpoints | grep $ETCD_NODENAME | awk '{print $1}' | sed -e 's/,$//') + [ -n "$MY_ID" ] && retry 12 5 5 etcdctl member remove $MY_ID --endpoints=$etcd_endpoints + + # Announce new etcd member and capture ETCD_INITIAL_CLUSTER, retry needed in case another node joining causes temp quorum loss + ETCD_ENVS=$(retry 12 5 5 etcdctl member add $ETCD_NODENAME --peer-urls="https://${ETCD_NODENAME}:2380" --endpoints=$etcd_endpoints) + export $(echo "$ETCD_ENVS" | grep ETCD_INITIAL_CLUSTER= | sed -e 's/"//g') + + # Patch kubezero.yaml and re-render to get etcd manifest patched + yq eval -i '.etcd.state = "existing" + | .etcd.initialCluster = strenv(ETCD_INITIAL_CLUSTER) + ' ${HOSTFS}/etc/kubernetes/kubezero.yaml + render_kubeadm + + # Generate our advanced etcd yaml + _kubeadm init phase etcd local --experimental-patches /tmp/patches + + _kubeadm init phase control-plane all --experimental-patches /tmp/patches + _kubeadm init phase mark-control-plane + _kubeadm init phase kubelet-finalize all + else - kubeadm init --config /etc/kubernetes/kubeadm.yaml --rootfs ${HOSTFS} \ - --experimental-patches /tmp/patches --skip-token-print $LOG + _kubeadm init --experimental-patches /tmp/patches --skip-token-print fi cp ${HOSTFS}/etc/kubernetes/admin.conf ${HOSTFS}/root/.kube/config @@ -273,13 +310,7 @@ elif [ "$1" == 'backup' ]; then restic snapshots || restic init || exit 1 - # etcd - export ETCDCTL_API=3 - export ETCDCTL_CACERT=${HOSTFS}/etc/kubernetes/pki/etcd/ca.crt - export ETCDCTL_CERT=${HOSTFS}/etc/kubernetes/pki/apiserver-etcd-client.crt - export ETCDCTL_KEY=${HOSTFS}/etc/kubernetes/pki/apiserver-etcd-client.key - - etcdctl --endpoints=https://localhost:2379 snapshot save ${WORKDIR}/etcd_snapshot + etcdctl --endpoints=https://${ETCD_NODENAME}:2379 snapshot save ${WORKDIR}/etcd_snapshot # pki & cluster-admin access cp -r ${HOSTFS}/etc/kubernetes/pki ${WORKDIR} diff --git a/containers/admin/v1.21/kubezero_121.sh b/containers/admin/v1.21/kubezero_121.sh index 649901c..70d40c8 100755 --- a/containers/admin/v1.21/kubezero_121.sh +++ b/containers/admin/v1.21/kubezero_121.sh @@ -2,7 +2,7 @@ set -x # Allow EFS and EBS Argo apps to be deleted without removing things like storageClasses etc. -# to be replaced by kubezero-storage +# all to be replaced by kubezero-storage kubectl patch application aws-ebs-csi-driver -n argocd --type=json -p='[{"op": "remove", "path": "/metadata/finalizers"}]' kubectl patch application aws-efs-csi-driver -n argocd --type=json -p='[{"op": "remove", "path": "/metadata/finalizers"}]' @@ -10,6 +10,8 @@ kubectl patch application aws-efs-csi-driver -n argocd --type=json -p='[{"op": # This will NOT affect provisioned volumes kubectl delete deployment ebs-csi-controller -n kube-system kubectl delete daemonSet ebs-csi-node -n kube-system + +# Snapshot controller was removed from EBS chart kubectl delete statefulset ebs-snapshot-controller -n kube-system kubectl delete deployment efs-csi-controller -n kube-system