feat: new etcd setup, drop kubeadm Join

This commit is contained in:
Stefan Reimer 2021-12-15 23:19:15 +01:00
parent cd56f0a7aa
commit 7ff4bc9491
9 changed files with 105 additions and 77 deletions

View File

@ -7,7 +7,7 @@ KUBE_VERSION := $(shell echo $(VERSION) | sed -e 's/\.[[:digit:]]*$$//')
.PHONY: build push clean scan
all: clean build push
all: build push
build:
podman build --rm --squash-all --build-arg KUBE_VERSION=$(KUBE_VERSION) --build-arg ALPINE_VERSION=$(ALPINE_VERSION) -t $(TAG) .
@ -18,7 +18,7 @@ push:
podman push $(REGISTRY)/$(TAG)
clean:
podman rmi -f $(TAG)
podman image prune -f
scan:
podman system service&

View File

@ -25,12 +25,12 @@ Kubernetes: `>= 1.20.0`
| addons.clusterBackup.enabled | bool | `false` | |
| addons.clusterBackup.passwordFile | string | `""` | /etc/cloudbender/clusterBackup.passphrase |
| addons.clusterBackup.repository | string | `""` | s3:https://s3.amazonaws.com/${CFN[ConfigBucket]}/k8s/${CLUSTERNAME}/clusterBackup |
| api.allEtcdEndpoints | string | `""` | |
| api.apiAudiences | string | `"istio-ca"` | |
| api.awsIamAuth.enabled | bool | `false` | |
| api.awsIamAuth.kubeAdminRole | string | `"arn:aws:iam::000000000000:role/KubernetesNode"` | |
| api.awsIamAuth.workerNodeRole | string | `"arn:aws:iam::000000000000:role/KubernetesNode"` | |
| api.endpoint | string | `"kube-api.changeme.org:6443"` | |
| api.etcdServers | string | `"https://localhost:2379"` | |
| api.extraArgs | object | `{}` | |
| api.listenPort | int | `6443` | |
| api.oidcEndpoint | string | `""` | s3://${CFN[ConfigBucket]}/k8s/$CLUSTERNAME |
@ -38,7 +38,8 @@ Kubernetes: `>= 1.20.0`
| clusterName | string | `"pleasechangeme"` | |
| domain | string | `"changeme.org"` | |
| etcd.extraArgs | object | `{}` | |
| etcd.nodeName | string | `"set_via_cmdline"` | |
| etcd.nodeName | string | `"etcd"` | |
| etcd.state | string | `"new"` | |
| highAvailable | bool | `false` | |
| listenAddress | string | `"0.0.0.0"` | Needs to be set to primary node IP |
| network.calico.enabled | bool | `false` | |

View File

@ -11,22 +11,25 @@ etcd:
### DNS discovery
#discovery-srv: {{ .Values.domain }}
#discovery-srv-name: {{ .Values.clusterName }}
#initial-cluster:
advertise-client-urls: https://{{ .Values.etcd.nodeName }}:2379
initial-advertise-peer-urls: https://{{ .Values.etcd.nodeName }}:2380
initial-cluster: {{ include "kubeadm.etcd.initialCluster" .Values.etcd | quote }}
initial-cluster-state: {{ .Values.etcd.state }}
initial-cluster-token: etcd-{{ .Values.clusterName }}
listen-metrics-urls: "http://0.0.0.0:2381"
logger: "zap"
name: {{ .Values.etcd.nodeName }}
listen-peer-urls: https://{{ .Values.listenAddress }}:2380
listen-client-urls: https://{{ .Values.listenAddress }}:2379
listen-metrics-urls: http://0.0.0.0:2381
logger: zap
# log-level: "warn"
{{- with .Values.etcd.extraArgs }}
{{- toYaml . | nindent 6 }}
{{- end }}
# These will only be used to create the etcd certs but removed for Init/Join kudeadm calls allowing us to sneak in aliases for etcd nodes
serverCertSANs:
- "{{ .Values.listenAddress }}"
- "{{ .Values.etcd.nodeName }}"
- "{{ .Values.etcd.nodeName }}.{{ .Values.domain }}"
- "{{ .Values.domain }}"
peerCertSANs:
- "{{ .Values.listenAddress }}"
- "{{ .Values.etcd.nodeName }}"
- "{{ .Values.etcd.nodeName }}.{{ .Values.domain }}"
- "{{ .Values.domain }}"
@ -34,20 +37,20 @@ controllerManager:
extraArgs:
profiling: "false"
terminated-pod-gc-threshold: "300"
# leader-elect: {{ .Values.highAvailable | quote }}
leader-elect: {{ .Values.highAvailable | quote }}
logging-format: json
feature-gates: {{ include "kubeadm.featuregates" ( dict "return" "csv" ) | trimSuffix "," | quote }}
scheduler:
extraArgs:
profiling: "false"
# leader-elect: {{ .Values.highAvailable | quote }}
leader-elect: {{ .Values.highAvailable | quote }}
logging-format: json
feature-gates: {{ include "kubeadm.featuregates" ( dict "return" "csv" ) | trimSuffix "," | quote }}
apiServer:
certSANs:
- {{ regexSplit ":" .Values.api.endpoint -1 | first }}
extraArgs:
etcd-servers: {{ .Values.api.allEtcdEndpoints }}
etcd-servers: {{ .Values.api.etcdServers }}
profiling: "false"
audit-log-path: "/var/log/kubernetes/audit.log"
audit-policy-file: /etc/kubernetes/apiserver/audit-policy.yaml

View File

@ -5,11 +5,17 @@ localAPIEndpoint:
bindPort: {{ .Values.api.listenPort }}
nodeRegistration:
ignorePreflightErrors:
- Swap
- DirAvailable--var-lib-etcd
- DirAvailable--etc-kubernetes-manifests
- FileAvailable--etc-kubernetes-pki-ca.crt
- FileAvailable--etc-kubernetes-manifests-etcd.yaml
- Swap
- KubeletVersion
kubeletExtraArgs:
node-labels: {{ .Values.nodeLabels | quote }}
{{- with .Values.providerID }}
provider-id: {{ . }}
{{- end }}
{{- if ne .Values.listenAddress "0.0.0.0" }}
node-ip: {{ .Values.listenAddress }}
{{- end }}

View File

@ -1,24 +0,0 @@
# This is for controllers only, workers dont use kubeadm
apiVersion: kubeadm.k8s.io/v1beta2
kind: JoinConfiguration
discovery:
file:
kubeConfigPath: /root/.kube/config
controlPlane:
localAPIEndpoint:
advertiseAddress: {{ .Values.listenAddress }}
bindPort: {{ .Values.api.listenPort }}
nodeRegistration:
ignorePreflightErrors:
- DirAvailable--var-lib-etcd
- FileAvailable--etc-kubernetes-pki-ca.crt
- Swap
- KubeletVersion
kubeletExtraArgs:
node-labels: {{ .Values.nodeLabels | quote }}
{{- with .Values.providerID }}
provider-id: {{ . }}
{{- end }}
{{- if ne .Values.listenAddress "0.0.0.0" }}
node-ip: {{ .Values.listenAddress }}
{{- end }}

View File

@ -1,6 +1,4 @@
{{- /*
Feature gates for all control plane components
*/ -}}
{{- /* Feature gates for all control plane components */ -}}
{{- define "kubeadm.featuregates" -}}
{{- $gates := list "CustomCPUCFSQuotaPeriod" "GenericEphemeralVolume" "InTreePluginAWSUnregister" "InTreePluginAzureDiskUnregister" "InTreePluginAzureFileUnregister" "InTreePluginGCEUnregister" "InTreePluginOpenStackUnregister" }}
{{- if eq .return "csv" }}
@ -13,3 +11,13 @@ Feature gates for all control plane components
{{- end }}
{{- end }}
{{- end -}}
{{- /* Etcd default initial cluster */ -}}
{{- define "kubeadm.etcd.initialCluster" -}}
{{- if .initialCluster -}}
{{ .initialCluster }}
{{- else -}}
{{ .nodeName }}=https://{{ .nodeName }}:2380
{{- end -}}
{{- end -}}

View File

@ -9,7 +9,7 @@ listenAddress: 0.0.0.0
api:
endpoint: kube-api.changeme.org:6443
listenPort: 6443
allEtcdEndpoints: ""
etcdServers: "https://localhost:2379"
extraArgs: {}
# -- https://s3.${REGION}.amazonaws.com/${CFN[ConfigBucket]}/k8s/$CLUSTERNAME
serviceAccountIssuer: ""
@ -47,7 +47,8 @@ network:
highAvailable: false
etcd:
nodeName: set_via_cmdline
nodeName: etcd
state: new
extraArgs: {}
# -- Set to false for openrc, eg. on Gentoo or Alpine

View File

@ -7,6 +7,12 @@ VERSION=v1.21
export KUBECONFIG="${HOSTFS}/root/.kube/config"
# etcd
export ETCDCTL_API=3
export ETCDCTL_CACERT=${HOSTFS}/etc/kubernetes/pki/etcd/ca.crt
export ETCDCTL_CERT=${HOSTFS}/etc/kubernetes/pki/apiserver-etcd-client.crt
export ETCDCTL_KEY=${HOSTFS}/etc/kubernetes/pki/apiserver-etcd-client.key
if [ -n "$DEBUG" ]; then
set -x
LOG="--v=5"
@ -27,24 +33,22 @@ retry() {
}
_kubeadm() {
kubeadm $@ --config /etc/kubernetes/kubeadm.yaml --rootfs ${HOSTFS} $LOG
}
# Render cluster config
render_kubeadm() {
helm template /opt/kubeadm --output-dir ${WORKDIR} -f ${HOSTFS}/etc/kubernetes/kubezero.yaml
# Assemble kubeadm config
cat /dev/null > ${HOSTFS}/etc/kubernetes/kubeadm-etcd.yaml
cat /dev/null > ${HOSTFS}/etc/kubernetes/kubeadm.yaml
for f in Cluster Init KubeProxy Kubelet; do
# echo "---" >> /etc/kubernetes/kubeadm-etcd.yaml
cat ${WORKDIR}/kubeadm/templates/${f}Configuration.yaml >> ${HOSTFS}/etc/kubernetes/kubeadm-etcd.yaml
# echo "---" >> /etc/kubernetes/kubeadm.yaml
cat ${WORKDIR}/kubeadm/templates/${f}Configuration.yaml >> ${HOSTFS}/etc/kubernetes/kubeadm.yaml
done
# Remove etcd custom cert entries from final kubeadm config
yq eval 'del(.etcd.local.serverCertSANs) | del(.etcd.local.peerCertSANs)' \
${HOSTFS}/etc/kubernetes/kubeadm-etcd.yaml > ${HOSTFS}/etc/kubernetes/kubeadm.yaml
# Copy JoinConfig
cp ${WORKDIR}/kubeadm/templates/JoinConfiguration.yaml ${HOSTFS}/etc/kubernetes
# hack to "uncloack" the json patches after they go processed by helm
for s in apiserver controller-manager scheduler; do
yq eval '.json' ${WORKDIR}/kubeadm/templates/patches/kube-${s}1\+json.yaml > /tmp/_tmp.yaml && \
@ -58,7 +62,7 @@ parse_kubezero() {
KUBE_VERSION=$(kubeadm version -o yaml | yq eval .clientVersion.gitVersion -)
CLUSTERNAME=$(yq eval '.clusterName' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
NODENAME=$(yq eval '.nodeName' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
ETCD_NODENAME=$(yq eval '.etcd.nodeName' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
AWS_IAM_AUTH=$(yq eval '.api.awsIamAuth.enabled' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
AWS_NTH=$(yq eval '.addons.aws-node-termination-handler.enabled' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
@ -125,8 +129,7 @@ if [ "$1" == 'upgrade' ]; then
pre_kubeadm
# Upgrade
kubeadm upgrade apply --config /etc/kubernetes/kubeadm.yaml --rootfs ${HOSTFS} \
--experimental-patches /tmp/patches $LOG -y
_kubeadm upgrade apply -y --experimental-patches /tmp/patches
post_kubeadm
@ -187,6 +190,7 @@ elif [[ "$1" == 'node-upgrade' ]]; then
# enable backup and awsIamAuth. multus, match other reorg
yq -Mi e '.api.awsIamAuth.enabled = "true" | .api.awsIamAuth.workerNodeRole = .workerNodeRole | .api.awsIamAuth.kubeAdminRole = .kubeAdminRole
| .api.serviceAccountIssuer = .serviceAccountIssuer | .api.apiAudiences = "istio-ca,sts.amazonaws.com"
| .api.etcdServers = .api.allEtcdEndpoints
| .network.multus.enabled = "true"
| .addons.clusterBackup.enabled = "true" | .addons.clusterBackup.repository = strenv(restic_repo) | .addons.clusterBackup.password = strenv(restic_pw)
| .addons.clusterBackup.extraEnv[0].name = "AWS_DEFAULT_REGION" | .addons.clusterBackup.extraEnv[0].value = strenv(REGION)
@ -202,33 +206,66 @@ elif [[ "$1" =~ "^(bootstrap|recover|join)$" ]]; then
# Recert certificates for THIS node
rm -f ${HOSTFS}/etc/kubernetes/pki/etcd/peer.* ${HOSTFS}/etc/kubernetes/pki/etcd/server.* ${HOSTFS}/etc/kubernetes/pki/apiserver.*
kubeadm init phase certs etcd-server --config=/etc/kubernetes/kubeadm-etcd.yaml --rootfs ${HOSTFS}
kubeadm init phase certs etcd-peer --config=/etc/kubernetes/kubeadm-etcd.yaml --rootfs ${HOSTFS}
kubeadm init phase certs apiserver --config=/etc/kubernetes/kubeadm.yaml --rootfs ${HOSTFS}
_kubeadm init phase certs etcd-server
_kubeadm init phase certs etcd-peer
_kubeadm init phase certs apiserver
# Restore only etcd for desaster recovery
if [[ "$1" =~ "^(recover)$" ]]; then
etcdctl snapshot restore ${HOSTFS}/etc/kubernetes \
--name $NODENAME \
etcdctl snapshot restore ${HOSTFS}/etc/kubernetes/etcd_snapshot \
--name $ETCD_NODENAME \
--data-dir="${HOSTFS}/var/lib/etcd" \
--initial-cluster-token ${CLUSTERNAME} \
--initial-advertise-peer-urls https://${NODENAME}:2380 \
--initial-cluster $NODENAME=https://${NODENAME}:2380
--initial-cluster-token etcd-${CLUSTERNAME} \
--initial-advertise-peer-urls https://${ETCD_NODENAME}:2380 \
--initial-cluster $ETCD_NODENAME=https://${ETCD_NODENAME}:2380
fi
# Create all certs during bootstrap
else
kubeadm init phase certs all --config=/etc/kubernetes/kubeadm-etcd.yaml --rootfs ${HOSTFS}
_kubeadm init phase certs all
fi
pre_kubeadm
if [[ "$1" =~ "^(join)$" ]]; then
kubeadm join --config /etc/kubernetes/JoinConfiguration.yaml --rootfs ${HOSTFS} \
--experimental-patches /tmp/patches $LOG
_kubeadm init phase preflight
_kubeadm init phase kubeconfig all
_kubeadm init phase kubelet-start
# first get current running etcd pods for etcdctl commands
# retry in case other nodes join / API fails / etcd leader changes etc.
while true; do
etcd_endpoints=$(kubectl get pods -n kube-system -l component=etcd -o yaml | \
yq eval '.items[].metadata.annotations."kubeadm.kubernetes.io/etcd.advertise-client-urls"' - | tr '\n' ',' | sed -e 's/,$//')
[[ $etcd_endpoints =~ ^https:// ]] && break
sleep 3
done
# is our $ETCD_NODENAME already in the etcd cluster ?
# Remove former self first
MY_ID=$(etcdctl member list --endpoints=$etcd_endpoints | grep $ETCD_NODENAME | awk '{print $1}' | sed -e 's/,$//')
[ -n "$MY_ID" ] && retry 12 5 5 etcdctl member remove $MY_ID --endpoints=$etcd_endpoints
# Announce new etcd member and capture ETCD_INITIAL_CLUSTER, retry needed in case another node joining causes temp quorum loss
ETCD_ENVS=$(retry 12 5 5 etcdctl member add $ETCD_NODENAME --peer-urls="https://${ETCD_NODENAME}:2380" --endpoints=$etcd_endpoints)
export $(echo "$ETCD_ENVS" | grep ETCD_INITIAL_CLUSTER= | sed -e 's/"//g')
# Patch kubezero.yaml and re-render to get etcd manifest patched
yq eval -i '.etcd.state = "existing"
| .etcd.initialCluster = strenv(ETCD_INITIAL_CLUSTER)
' ${HOSTFS}/etc/kubernetes/kubezero.yaml
render_kubeadm
# Generate our advanced etcd yaml
_kubeadm init phase etcd local --experimental-patches /tmp/patches
_kubeadm init phase control-plane all --experimental-patches /tmp/patches
_kubeadm init phase mark-control-plane
_kubeadm init phase kubelet-finalize all
else
kubeadm init --config /etc/kubernetes/kubeadm.yaml --rootfs ${HOSTFS} \
--experimental-patches /tmp/patches --skip-token-print $LOG
_kubeadm init --experimental-patches /tmp/patches --skip-token-print
fi
cp ${HOSTFS}/etc/kubernetes/admin.conf ${HOSTFS}/root/.kube/config
@ -273,13 +310,7 @@ elif [ "$1" == 'backup' ]; then
restic snapshots || restic init || exit 1
# etcd
export ETCDCTL_API=3
export ETCDCTL_CACERT=${HOSTFS}/etc/kubernetes/pki/etcd/ca.crt
export ETCDCTL_CERT=${HOSTFS}/etc/kubernetes/pki/apiserver-etcd-client.crt
export ETCDCTL_KEY=${HOSTFS}/etc/kubernetes/pki/apiserver-etcd-client.key
etcdctl --endpoints=https://localhost:2379 snapshot save ${WORKDIR}/etcd_snapshot
etcdctl --endpoints=https://${ETCD_NODENAME}:2379 snapshot save ${WORKDIR}/etcd_snapshot
# pki & cluster-admin access
cp -r ${HOSTFS}/etc/kubernetes/pki ${WORKDIR}

View File

@ -2,7 +2,7 @@
set -x
# Allow EFS and EBS Argo apps to be deleted without removing things like storageClasses etc.
# to be replaced by kubezero-storage
# all to be replaced by kubezero-storage
kubectl patch application aws-ebs-csi-driver -n argocd --type=json -p='[{"op": "remove", "path": "/metadata/finalizers"}]'
kubectl patch application aws-efs-csi-driver -n argocd --type=json -p='[{"op": "remove", "path": "/metadata/finalizers"}]'
@ -10,6 +10,8 @@ kubectl patch application aws-efs-csi-driver -n argocd --type=json -p='[{"op":
# This will NOT affect provisioned volumes
kubectl delete deployment ebs-csi-controller -n kube-system
kubectl delete daemonSet ebs-csi-node -n kube-system
# Snapshot controller was removed from EBS chart
kubectl delete statefulset ebs-snapshot-controller -n kube-system
kubectl delete deployment efs-csi-controller -n kube-system