feat: new etcd setup, drop kubeadm Join
This commit is contained in:
parent
cd56f0a7aa
commit
7ff4bc9491
@ -7,7 +7,7 @@ KUBE_VERSION := $(shell echo $(VERSION) | sed -e 's/\.[[:digit:]]*$$//')
|
||||
|
||||
.PHONY: build push clean scan
|
||||
|
||||
all: clean build push
|
||||
all: build push
|
||||
|
||||
build:
|
||||
podman build --rm --squash-all --build-arg KUBE_VERSION=$(KUBE_VERSION) --build-arg ALPINE_VERSION=$(ALPINE_VERSION) -t $(TAG) .
|
||||
@ -18,7 +18,7 @@ push:
|
||||
podman push $(REGISTRY)/$(TAG)
|
||||
|
||||
clean:
|
||||
podman rmi -f $(TAG)
|
||||
podman image prune -f
|
||||
|
||||
scan:
|
||||
podman system service&
|
||||
|
@ -25,12 +25,12 @@ Kubernetes: `>= 1.20.0`
|
||||
| addons.clusterBackup.enabled | bool | `false` | |
|
||||
| addons.clusterBackup.passwordFile | string | `""` | /etc/cloudbender/clusterBackup.passphrase |
|
||||
| addons.clusterBackup.repository | string | `""` | s3:https://s3.amazonaws.com/${CFN[ConfigBucket]}/k8s/${CLUSTERNAME}/clusterBackup |
|
||||
| api.allEtcdEndpoints | string | `""` | |
|
||||
| api.apiAudiences | string | `"istio-ca"` | |
|
||||
| api.awsIamAuth.enabled | bool | `false` | |
|
||||
| api.awsIamAuth.kubeAdminRole | string | `"arn:aws:iam::000000000000:role/KubernetesNode"` | |
|
||||
| api.awsIamAuth.workerNodeRole | string | `"arn:aws:iam::000000000000:role/KubernetesNode"` | |
|
||||
| api.endpoint | string | `"kube-api.changeme.org:6443"` | |
|
||||
| api.etcdServers | string | `"https://localhost:2379"` | |
|
||||
| api.extraArgs | object | `{}` | |
|
||||
| api.listenPort | int | `6443` | |
|
||||
| api.oidcEndpoint | string | `""` | s3://${CFN[ConfigBucket]}/k8s/$CLUSTERNAME |
|
||||
@ -38,7 +38,8 @@ Kubernetes: `>= 1.20.0`
|
||||
| clusterName | string | `"pleasechangeme"` | |
|
||||
| domain | string | `"changeme.org"` | |
|
||||
| etcd.extraArgs | object | `{}` | |
|
||||
| etcd.nodeName | string | `"set_via_cmdline"` | |
|
||||
| etcd.nodeName | string | `"etcd"` | |
|
||||
| etcd.state | string | `"new"` | |
|
||||
| highAvailable | bool | `false` | |
|
||||
| listenAddress | string | `"0.0.0.0"` | Needs to be set to primary node IP |
|
||||
| network.calico.enabled | bool | `false` | |
|
||||
|
@ -11,22 +11,25 @@ etcd:
|
||||
### DNS discovery
|
||||
#discovery-srv: {{ .Values.domain }}
|
||||
#discovery-srv-name: {{ .Values.clusterName }}
|
||||
#initial-cluster:
|
||||
advertise-client-urls: https://{{ .Values.etcd.nodeName }}:2379
|
||||
initial-advertise-peer-urls: https://{{ .Values.etcd.nodeName }}:2380
|
||||
initial-cluster: {{ include "kubeadm.etcd.initialCluster" .Values.etcd | quote }}
|
||||
initial-cluster-state: {{ .Values.etcd.state }}
|
||||
initial-cluster-token: etcd-{{ .Values.clusterName }}
|
||||
listen-metrics-urls: "http://0.0.0.0:2381"
|
||||
logger: "zap"
|
||||
name: {{ .Values.etcd.nodeName }}
|
||||
listen-peer-urls: https://{{ .Values.listenAddress }}:2380
|
||||
listen-client-urls: https://{{ .Values.listenAddress }}:2379
|
||||
listen-metrics-urls: http://0.0.0.0:2381
|
||||
logger: zap
|
||||
# log-level: "warn"
|
||||
{{- with .Values.etcd.extraArgs }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
# These will only be used to create the etcd certs but removed for Init/Join kudeadm calls allowing us to sneak in aliases for etcd nodes
|
||||
serverCertSANs:
|
||||
- "{{ .Values.listenAddress }}"
|
||||
- "{{ .Values.etcd.nodeName }}"
|
||||
- "{{ .Values.etcd.nodeName }}.{{ .Values.domain }}"
|
||||
- "{{ .Values.domain }}"
|
||||
peerCertSANs:
|
||||
- "{{ .Values.listenAddress }}"
|
||||
- "{{ .Values.etcd.nodeName }}"
|
||||
- "{{ .Values.etcd.nodeName }}.{{ .Values.domain }}"
|
||||
- "{{ .Values.domain }}"
|
||||
@ -34,20 +37,20 @@ controllerManager:
|
||||
extraArgs:
|
||||
profiling: "false"
|
||||
terminated-pod-gc-threshold: "300"
|
||||
# leader-elect: {{ .Values.highAvailable | quote }}
|
||||
leader-elect: {{ .Values.highAvailable | quote }}
|
||||
logging-format: json
|
||||
feature-gates: {{ include "kubeadm.featuregates" ( dict "return" "csv" ) | trimSuffix "," | quote }}
|
||||
scheduler:
|
||||
extraArgs:
|
||||
profiling: "false"
|
||||
# leader-elect: {{ .Values.highAvailable | quote }}
|
||||
leader-elect: {{ .Values.highAvailable | quote }}
|
||||
logging-format: json
|
||||
feature-gates: {{ include "kubeadm.featuregates" ( dict "return" "csv" ) | trimSuffix "," | quote }}
|
||||
apiServer:
|
||||
certSANs:
|
||||
- {{ regexSplit ":" .Values.api.endpoint -1 | first }}
|
||||
extraArgs:
|
||||
etcd-servers: {{ .Values.api.allEtcdEndpoints }}
|
||||
etcd-servers: {{ .Values.api.etcdServers }}
|
||||
profiling: "false"
|
||||
audit-log-path: "/var/log/kubernetes/audit.log"
|
||||
audit-policy-file: /etc/kubernetes/apiserver/audit-policy.yaml
|
||||
|
@ -5,11 +5,17 @@ localAPIEndpoint:
|
||||
bindPort: {{ .Values.api.listenPort }}
|
||||
nodeRegistration:
|
||||
ignorePreflightErrors:
|
||||
- Swap
|
||||
- DirAvailable--var-lib-etcd
|
||||
- DirAvailable--etc-kubernetes-manifests
|
||||
- FileAvailable--etc-kubernetes-pki-ca.crt
|
||||
- FileAvailable--etc-kubernetes-manifests-etcd.yaml
|
||||
- Swap
|
||||
- KubeletVersion
|
||||
kubeletExtraArgs:
|
||||
node-labels: {{ .Values.nodeLabels | quote }}
|
||||
{{- with .Values.providerID }}
|
||||
provider-id: {{ . }}
|
||||
{{- end }}
|
||||
{{- if ne .Values.listenAddress "0.0.0.0" }}
|
||||
node-ip: {{ .Values.listenAddress }}
|
||||
{{- end }}
|
||||
|
@ -1,24 +0,0 @@
|
||||
# This is for controllers only, workers dont use kubeadm
|
||||
apiVersion: kubeadm.k8s.io/v1beta2
|
||||
kind: JoinConfiguration
|
||||
discovery:
|
||||
file:
|
||||
kubeConfigPath: /root/.kube/config
|
||||
controlPlane:
|
||||
localAPIEndpoint:
|
||||
advertiseAddress: {{ .Values.listenAddress }}
|
||||
bindPort: {{ .Values.api.listenPort }}
|
||||
nodeRegistration:
|
||||
ignorePreflightErrors:
|
||||
- DirAvailable--var-lib-etcd
|
||||
- FileAvailable--etc-kubernetes-pki-ca.crt
|
||||
- Swap
|
||||
- KubeletVersion
|
||||
kubeletExtraArgs:
|
||||
node-labels: {{ .Values.nodeLabels | quote }}
|
||||
{{- with .Values.providerID }}
|
||||
provider-id: {{ . }}
|
||||
{{- end }}
|
||||
{{- if ne .Values.listenAddress "0.0.0.0" }}
|
||||
node-ip: {{ .Values.listenAddress }}
|
||||
{{- end }}
|
@ -1,6 +1,4 @@
|
||||
{{- /*
|
||||
Feature gates for all control plane components
|
||||
*/ -}}
|
||||
{{- /* Feature gates for all control plane components */ -}}
|
||||
{{- define "kubeadm.featuregates" -}}
|
||||
{{- $gates := list "CustomCPUCFSQuotaPeriod" "GenericEphemeralVolume" "InTreePluginAWSUnregister" "InTreePluginAzureDiskUnregister" "InTreePluginAzureFileUnregister" "InTreePluginGCEUnregister" "InTreePluginOpenStackUnregister" }}
|
||||
{{- if eq .return "csv" }}
|
||||
@ -13,3 +11,13 @@ Feature gates for all control plane components
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end -}}
|
||||
|
||||
|
||||
{{- /* Etcd default initial cluster */ -}}
|
||||
{{- define "kubeadm.etcd.initialCluster" -}}
|
||||
{{- if .initialCluster -}}
|
||||
{{ .initialCluster }}
|
||||
{{- else -}}
|
||||
{{ .nodeName }}=https://{{ .nodeName }}:2380
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
@ -9,7 +9,7 @@ listenAddress: 0.0.0.0
|
||||
api:
|
||||
endpoint: kube-api.changeme.org:6443
|
||||
listenPort: 6443
|
||||
allEtcdEndpoints: ""
|
||||
etcdServers: "https://localhost:2379"
|
||||
extraArgs: {}
|
||||
# -- https://s3.${REGION}.amazonaws.com/${CFN[ConfigBucket]}/k8s/$CLUSTERNAME
|
||||
serviceAccountIssuer: ""
|
||||
@ -47,7 +47,8 @@ network:
|
||||
highAvailable: false
|
||||
|
||||
etcd:
|
||||
nodeName: set_via_cmdline
|
||||
nodeName: etcd
|
||||
state: new
|
||||
extraArgs: {}
|
||||
|
||||
# -- Set to false for openrc, eg. on Gentoo or Alpine
|
||||
|
@ -7,6 +7,12 @@ VERSION=v1.21
|
||||
|
||||
export KUBECONFIG="${HOSTFS}/root/.kube/config"
|
||||
|
||||
# etcd
|
||||
export ETCDCTL_API=3
|
||||
export ETCDCTL_CACERT=${HOSTFS}/etc/kubernetes/pki/etcd/ca.crt
|
||||
export ETCDCTL_CERT=${HOSTFS}/etc/kubernetes/pki/apiserver-etcd-client.crt
|
||||
export ETCDCTL_KEY=${HOSTFS}/etc/kubernetes/pki/apiserver-etcd-client.key
|
||||
|
||||
if [ -n "$DEBUG" ]; then
|
||||
set -x
|
||||
LOG="--v=5"
|
||||
@ -27,24 +33,22 @@ retry() {
|
||||
}
|
||||
|
||||
|
||||
_kubeadm() {
|
||||
kubeadm $@ --config /etc/kubernetes/kubeadm.yaml --rootfs ${HOSTFS} $LOG
|
||||
}
|
||||
|
||||
|
||||
# Render cluster config
|
||||
render_kubeadm() {
|
||||
helm template /opt/kubeadm --output-dir ${WORKDIR} -f ${HOSTFS}/etc/kubernetes/kubezero.yaml
|
||||
|
||||
# Assemble kubeadm config
|
||||
cat /dev/null > ${HOSTFS}/etc/kubernetes/kubeadm-etcd.yaml
|
||||
cat /dev/null > ${HOSTFS}/etc/kubernetes/kubeadm.yaml
|
||||
for f in Cluster Init KubeProxy Kubelet; do
|
||||
# echo "---" >> /etc/kubernetes/kubeadm-etcd.yaml
|
||||
cat ${WORKDIR}/kubeadm/templates/${f}Configuration.yaml >> ${HOSTFS}/etc/kubernetes/kubeadm-etcd.yaml
|
||||
# echo "---" >> /etc/kubernetes/kubeadm.yaml
|
||||
cat ${WORKDIR}/kubeadm/templates/${f}Configuration.yaml >> ${HOSTFS}/etc/kubernetes/kubeadm.yaml
|
||||
done
|
||||
|
||||
# Remove etcd custom cert entries from final kubeadm config
|
||||
yq eval 'del(.etcd.local.serverCertSANs) | del(.etcd.local.peerCertSANs)' \
|
||||
${HOSTFS}/etc/kubernetes/kubeadm-etcd.yaml > ${HOSTFS}/etc/kubernetes/kubeadm.yaml
|
||||
|
||||
# Copy JoinConfig
|
||||
cp ${WORKDIR}/kubeadm/templates/JoinConfiguration.yaml ${HOSTFS}/etc/kubernetes
|
||||
|
||||
# hack to "uncloack" the json patches after they go processed by helm
|
||||
for s in apiserver controller-manager scheduler; do
|
||||
yq eval '.json' ${WORKDIR}/kubeadm/templates/patches/kube-${s}1\+json.yaml > /tmp/_tmp.yaml && \
|
||||
@ -58,7 +62,7 @@ parse_kubezero() {
|
||||
|
||||
KUBE_VERSION=$(kubeadm version -o yaml | yq eval .clientVersion.gitVersion -)
|
||||
CLUSTERNAME=$(yq eval '.clusterName' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
|
||||
NODENAME=$(yq eval '.nodeName' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
|
||||
ETCD_NODENAME=$(yq eval '.etcd.nodeName' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
|
||||
|
||||
AWS_IAM_AUTH=$(yq eval '.api.awsIamAuth.enabled' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
|
||||
AWS_NTH=$(yq eval '.addons.aws-node-termination-handler.enabled' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
|
||||
@ -125,8 +129,7 @@ if [ "$1" == 'upgrade' ]; then
|
||||
pre_kubeadm
|
||||
|
||||
# Upgrade
|
||||
kubeadm upgrade apply --config /etc/kubernetes/kubeadm.yaml --rootfs ${HOSTFS} \
|
||||
--experimental-patches /tmp/patches $LOG -y
|
||||
_kubeadm upgrade apply -y --experimental-patches /tmp/patches
|
||||
|
||||
post_kubeadm
|
||||
|
||||
@ -187,6 +190,7 @@ elif [[ "$1" == 'node-upgrade' ]]; then
|
||||
# enable backup and awsIamAuth. multus, match other reorg
|
||||
yq -Mi e '.api.awsIamAuth.enabled = "true" | .api.awsIamAuth.workerNodeRole = .workerNodeRole | .api.awsIamAuth.kubeAdminRole = .kubeAdminRole
|
||||
| .api.serviceAccountIssuer = .serviceAccountIssuer | .api.apiAudiences = "istio-ca,sts.amazonaws.com"
|
||||
| .api.etcdServers = .api.allEtcdEndpoints
|
||||
| .network.multus.enabled = "true"
|
||||
| .addons.clusterBackup.enabled = "true" | .addons.clusterBackup.repository = strenv(restic_repo) | .addons.clusterBackup.password = strenv(restic_pw)
|
||||
| .addons.clusterBackup.extraEnv[0].name = "AWS_DEFAULT_REGION" | .addons.clusterBackup.extraEnv[0].value = strenv(REGION)
|
||||
@ -202,33 +206,66 @@ elif [[ "$1" =~ "^(bootstrap|recover|join)$" ]]; then
|
||||
|
||||
# Recert certificates for THIS node
|
||||
rm -f ${HOSTFS}/etc/kubernetes/pki/etcd/peer.* ${HOSTFS}/etc/kubernetes/pki/etcd/server.* ${HOSTFS}/etc/kubernetes/pki/apiserver.*
|
||||
kubeadm init phase certs etcd-server --config=/etc/kubernetes/kubeadm-etcd.yaml --rootfs ${HOSTFS}
|
||||
kubeadm init phase certs etcd-peer --config=/etc/kubernetes/kubeadm-etcd.yaml --rootfs ${HOSTFS}
|
||||
kubeadm init phase certs apiserver --config=/etc/kubernetes/kubeadm.yaml --rootfs ${HOSTFS}
|
||||
_kubeadm init phase certs etcd-server
|
||||
_kubeadm init phase certs etcd-peer
|
||||
_kubeadm init phase certs apiserver
|
||||
|
||||
# Restore only etcd for desaster recovery
|
||||
if [[ "$1" =~ "^(recover)$" ]]; then
|
||||
etcdctl snapshot restore ${HOSTFS}/etc/kubernetes \
|
||||
--name $NODENAME \
|
||||
etcdctl snapshot restore ${HOSTFS}/etc/kubernetes/etcd_snapshot \
|
||||
--name $ETCD_NODENAME \
|
||||
--data-dir="${HOSTFS}/var/lib/etcd" \
|
||||
--initial-cluster-token ${CLUSTERNAME} \
|
||||
--initial-advertise-peer-urls https://${NODENAME}:2380 \
|
||||
--initial-cluster $NODENAME=https://${NODENAME}:2380
|
||||
--initial-cluster-token etcd-${CLUSTERNAME} \
|
||||
--initial-advertise-peer-urls https://${ETCD_NODENAME}:2380 \
|
||||
--initial-cluster $ETCD_NODENAME=https://${ETCD_NODENAME}:2380
|
||||
fi
|
||||
|
||||
# Create all certs during bootstrap
|
||||
else
|
||||
kubeadm init phase certs all --config=/etc/kubernetes/kubeadm-etcd.yaml --rootfs ${HOSTFS}
|
||||
_kubeadm init phase certs all
|
||||
fi
|
||||
|
||||
pre_kubeadm
|
||||
|
||||
if [[ "$1" =~ "^(join)$" ]]; then
|
||||
kubeadm join --config /etc/kubernetes/JoinConfiguration.yaml --rootfs ${HOSTFS} \
|
||||
--experimental-patches /tmp/patches $LOG
|
||||
|
||||
_kubeadm init phase preflight
|
||||
_kubeadm init phase kubeconfig all
|
||||
_kubeadm init phase kubelet-start
|
||||
|
||||
# first get current running etcd pods for etcdctl commands
|
||||
# retry in case other nodes join / API fails / etcd leader changes etc.
|
||||
while true; do
|
||||
etcd_endpoints=$(kubectl get pods -n kube-system -l component=etcd -o yaml | \
|
||||
yq eval '.items[].metadata.annotations."kubeadm.kubernetes.io/etcd.advertise-client-urls"' - | tr '\n' ',' | sed -e 's/,$//')
|
||||
[[ $etcd_endpoints =~ ^https:// ]] && break
|
||||
sleep 3
|
||||
done
|
||||
|
||||
# is our $ETCD_NODENAME already in the etcd cluster ?
|
||||
# Remove former self first
|
||||
MY_ID=$(etcdctl member list --endpoints=$etcd_endpoints | grep $ETCD_NODENAME | awk '{print $1}' | sed -e 's/,$//')
|
||||
[ -n "$MY_ID" ] && retry 12 5 5 etcdctl member remove $MY_ID --endpoints=$etcd_endpoints
|
||||
|
||||
# Announce new etcd member and capture ETCD_INITIAL_CLUSTER, retry needed in case another node joining causes temp quorum loss
|
||||
ETCD_ENVS=$(retry 12 5 5 etcdctl member add $ETCD_NODENAME --peer-urls="https://${ETCD_NODENAME}:2380" --endpoints=$etcd_endpoints)
|
||||
export $(echo "$ETCD_ENVS" | grep ETCD_INITIAL_CLUSTER= | sed -e 's/"//g')
|
||||
|
||||
# Patch kubezero.yaml and re-render to get etcd manifest patched
|
||||
yq eval -i '.etcd.state = "existing"
|
||||
| .etcd.initialCluster = strenv(ETCD_INITIAL_CLUSTER)
|
||||
' ${HOSTFS}/etc/kubernetes/kubezero.yaml
|
||||
render_kubeadm
|
||||
|
||||
# Generate our advanced etcd yaml
|
||||
_kubeadm init phase etcd local --experimental-patches /tmp/patches
|
||||
|
||||
_kubeadm init phase control-plane all --experimental-patches /tmp/patches
|
||||
_kubeadm init phase mark-control-plane
|
||||
_kubeadm init phase kubelet-finalize all
|
||||
|
||||
else
|
||||
kubeadm init --config /etc/kubernetes/kubeadm.yaml --rootfs ${HOSTFS} \
|
||||
--experimental-patches /tmp/patches --skip-token-print $LOG
|
||||
_kubeadm init --experimental-patches /tmp/patches --skip-token-print
|
||||
fi
|
||||
|
||||
cp ${HOSTFS}/etc/kubernetes/admin.conf ${HOSTFS}/root/.kube/config
|
||||
@ -273,13 +310,7 @@ elif [ "$1" == 'backup' ]; then
|
||||
|
||||
restic snapshots || restic init || exit 1
|
||||
|
||||
# etcd
|
||||
export ETCDCTL_API=3
|
||||
export ETCDCTL_CACERT=${HOSTFS}/etc/kubernetes/pki/etcd/ca.crt
|
||||
export ETCDCTL_CERT=${HOSTFS}/etc/kubernetes/pki/apiserver-etcd-client.crt
|
||||
export ETCDCTL_KEY=${HOSTFS}/etc/kubernetes/pki/apiserver-etcd-client.key
|
||||
|
||||
etcdctl --endpoints=https://localhost:2379 snapshot save ${WORKDIR}/etcd_snapshot
|
||||
etcdctl --endpoints=https://${ETCD_NODENAME}:2379 snapshot save ${WORKDIR}/etcd_snapshot
|
||||
|
||||
# pki & cluster-admin access
|
||||
cp -r ${HOSTFS}/etc/kubernetes/pki ${WORKDIR}
|
||||
|
@ -2,7 +2,7 @@
|
||||
set -x
|
||||
|
||||
# Allow EFS and EBS Argo apps to be deleted without removing things like storageClasses etc.
|
||||
# to be replaced by kubezero-storage
|
||||
# all to be replaced by kubezero-storage
|
||||
kubectl patch application aws-ebs-csi-driver -n argocd --type=json -p='[{"op": "remove", "path": "/metadata/finalizers"}]'
|
||||
kubectl patch application aws-efs-csi-driver -n argocd --type=json -p='[{"op": "remove", "path": "/metadata/finalizers"}]'
|
||||
|
||||
@ -10,6 +10,8 @@ kubectl patch application aws-efs-csi-driver -n argocd --type=json -p='[{"op":
|
||||
# This will NOT affect provisioned volumes
|
||||
kubectl delete deployment ebs-csi-controller -n kube-system
|
||||
kubectl delete daemonSet ebs-csi-node -n kube-system
|
||||
|
||||
# Snapshot controller was removed from EBS chart
|
||||
kubectl delete statefulset ebs-snapshot-controller -n kube-system
|
||||
|
||||
kubectl delete deployment efs-csi-controller -n kube-system
|
||||
|
Loading…
Reference in New Issue
Block a user