feat: improved admin debug_shell, add etcd defrag to backup
This commit is contained in:
parent
7bade805ee
commit
c58feb9882
@ -5,7 +5,7 @@ FROM alpine:${ALPINE_VERSION}
|
||||
ARG KUBE_VERSION
|
||||
|
||||
RUN cd /etc/apk/keys && \
|
||||
wget "https://cdn.zero-downtime.net/alpine/stefan@zero-downtime.net-6183d596.rsa.pub" && \
|
||||
wget "https://cdn.zero-downtime.net/alpine/stefan@zero-downtime.net-61bb6bfb.rsa.pub" && \
|
||||
echo "@kubezero https://cdn.zero-downtime.net/alpine/v${KUBE_VERSION}/kubezero" >> /etc/apk/repositories && \
|
||||
echo "@testing http://dl-cdn.alpinelinux.org/alpine/edge/testing" >> /etc/apk/repositories && \
|
||||
apk upgrade -U -a && \
|
||||
|
@ -1,4 +1,4 @@
|
||||
VERSION ?= 1.21.7
|
||||
VERSION ?= 1.21.8
|
||||
ALPINE_VERSION ?= 3.15
|
||||
REGISTRY := public.ecr.aws/zero-downtime
|
||||
REPOSITORY := kubezero-admin
|
||||
|
@ -2,7 +2,7 @@ apiVersion: v2
|
||||
name: kubeadm
|
||||
description: KubeZero Kubeadm cluster config
|
||||
type: application
|
||||
version: 1.21.7
|
||||
version: 1.21.8
|
||||
home: https://kubezero.com
|
||||
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
|
||||
keywords:
|
||||
|
@ -1,6 +1,6 @@
|
||||
# kubeadm
|
||||
|
||||
![Version: 1.21.7](https://img.shields.io/badge/Version-1.21.7-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
|
||||
![Version: 1.21.8](https://img.shields.io/badge/Version-1.21.8-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
|
||||
|
||||
KubeZero Kubeadm cluster config
|
||||
|
||||
@ -30,7 +30,7 @@ Kubernetes: `>= 1.20.0`
|
||||
| api.awsIamAuth.kubeAdminRole | string | `"arn:aws:iam::000000000000:role/KubernetesNode"` | |
|
||||
| api.awsIamAuth.workerNodeRole | string | `"arn:aws:iam::000000000000:role/KubernetesNode"` | |
|
||||
| api.endpoint | string | `"kube-api.changeme.org:6443"` | |
|
||||
| api.etcdServers | string | `"https://localhost:2379"` | |
|
||||
| api.etcdServers | string | `"https://etcd:2379"` | |
|
||||
| api.extraArgs | object | `{}` | |
|
||||
| api.listenPort | int | `6443` | |
|
||||
| api.oidcEndpoint | string | `""` | s3://${CFN[ConfigBucket]}/k8s/$CLUSTERNAME |
|
||||
@ -46,7 +46,7 @@ Kubernetes: `>= 1.20.0`
|
||||
| network.cilium.enabled | bool | `false` | |
|
||||
| network.multus.enabled | bool | `false` | |
|
||||
| network.multus.tag | string | `"v3.8"` | |
|
||||
| nodeName | string | `"localhost"` | set to $HOSTNAME |
|
||||
| nodeName | string | `"kubezero-node"` | set to $HOSTNAME |
|
||||
| protectKernelDefaults | bool | `true` | |
|
||||
| systemd | bool | `true` | Set to false for openrc, eg. on Gentoo or Alpine |
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
clusterName: pleasechangeme
|
||||
# -- set to $HOSTNAME
|
||||
nodeName: localhost
|
||||
nodeName: kubezero-node
|
||||
domain: changeme.org
|
||||
|
||||
# -- Needs to be set to primary node IP
|
||||
@ -9,7 +9,7 @@ listenAddress: 0.0.0.0
|
||||
api:
|
||||
endpoint: kube-api.changeme.org:6443
|
||||
listenPort: 6443
|
||||
etcdServers: "https://localhost:2379"
|
||||
etcdServers: "https://etcd:2379"
|
||||
extraArgs: {}
|
||||
# -- https://s3.${REGION}.amazonaws.com/${CFN[ConfigBucket]}/k8s/$CLUSTERNAME
|
||||
serviceAccountIssuer: ""
|
||||
|
@ -1,9 +1,15 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
WORKDIR=/tmp/kubezero
|
||||
HOSTFS=/host
|
||||
VERSION=v1.21
|
||||
if [ -n "$DEBUG" ]; then
|
||||
set -x
|
||||
LOG="--v=5"
|
||||
fi
|
||||
|
||||
# Export vars to ease use in debug_shell etc
|
||||
export WORKDIR=/tmp/kubezero
|
||||
export HOSTFS=/host
|
||||
export VERSION=v1.21
|
||||
|
||||
export KUBECONFIG="${HOSTFS}/root/.kube/config"
|
||||
|
||||
@ -13,11 +19,6 @@ export ETCDCTL_CACERT=${HOSTFS}/etc/kubernetes/pki/etcd/ca.crt
|
||||
export ETCDCTL_CERT=${HOSTFS}/etc/kubernetes/pki/apiserver-etcd-client.crt
|
||||
export ETCDCTL_KEY=${HOSTFS}/etc/kubernetes/pki/apiserver-etcd-client.key
|
||||
|
||||
if [ -n "$DEBUG" ]; then
|
||||
set -x
|
||||
LOG="--v=5"
|
||||
fi
|
||||
|
||||
# Generic retry utility
|
||||
retry() {
|
||||
local tries=$1
|
||||
@ -60,12 +61,12 @@ render_kubeadm() {
|
||||
parse_kubezero() {
|
||||
[ -f ${HOSTFS}/etc/kubernetes/kubezero.yaml ] || { echo "Missing /etc/kubernetes/kubezero.yaml!"; exit 1; }
|
||||
|
||||
KUBE_VERSION=$(kubeadm version -o yaml | yq eval .clientVersion.gitVersion -)
|
||||
CLUSTERNAME=$(yq eval '.clusterName' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
|
||||
ETCD_NODENAME=$(yq eval '.etcd.nodeName' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
|
||||
export KUBE_VERSION=$(kubeadm version -o yaml | yq eval .clientVersion.gitVersion -)
|
||||
export CLUSTERNAME=$(yq eval '.clusterName' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
|
||||
export ETCD_NODENAME=$(yq eval '.etcd.nodeName' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
|
||||
|
||||
AWS_IAM_AUTH=$(yq eval '.api.awsIamAuth.enabled' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
|
||||
AWS_NTH=$(yq eval '.addons.aws-node-termination-handler.enabled' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
|
||||
export AWS_IAM_AUTH=$(yq eval '.api.awsIamAuth.enabled' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
|
||||
export AWS_NTH=$(yq eval '.addons.aws-node-termination-handler.enabled' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
|
||||
}
|
||||
|
||||
|
||||
@ -173,28 +174,35 @@ if [ "$1" == 'upgrade' ]; then
|
||||
# - update oidc do we need that ?
|
||||
|
||||
elif [[ "$1" == 'node-upgrade' ]]; then
|
||||
|
||||
echo "Starting node upgrade ..."
|
||||
|
||||
if [ -f ${HOSTFS}/usr/local/sbin/backup_control_plane.sh ]; then
|
||||
mv ${HOSTFS}/usr/local/sbin/backup_control_plane.sh ${HOSTFS}/usr/local/sbin/backup_control_plane.disabled
|
||||
echo "Disabled old cluster backup OS cronjob"
|
||||
fi
|
||||
|
||||
echo "Migrating kubezero.yaml"
|
||||
yq -i eval '.api.etcdServers = .api.allEtcdEndpoints | .network.multus.enabled = "true"' ${HOSTFS}/etc/kubernetes/kubezero.yaml
|
||||
|
||||
export restic_repo=$(grep "export RESTIC_REPOSITORY" ${HOSTFS}/usr/local/sbin/backup_control_plane.disabled | sed -e 's/.*=//' | sed -e 's/"//g')
|
||||
export restic_pw="$(cat ${HOSTFS}/etc/cloudbender/clusterBackup.passphrase)"
|
||||
export REGION=$(kubectl get node $NODE_NAME -o yaml | yq eval '.metadata.labels."topology.kubernetes.io/region"' -)
|
||||
# AWS
|
||||
if [ -f ${HOSTFS}/etc/cloudbender/clusterBackup.passphrase ]; then
|
||||
if [ -f ${HOSTFS}/usr/local/sbin/backup_control_plane.sh ]; then
|
||||
mv ${HOSTFS}/usr/local/sbin/backup_control_plane.sh ${HOSTFS}/usr/local/sbin/backup_control_plane.disabled
|
||||
echo "Disabled old cluster backup OS cronjob"
|
||||
fi
|
||||
|
||||
# enable backup and awsIamAuth. multus, match other reorg
|
||||
yq -Mi e '.api.awsIamAuth.enabled = "true" | .api.awsIamAuth.workerNodeRole = .workerNodeRole | .api.awsIamAuth.kubeAdminRole = .kubeAdminRole
|
||||
| .api.serviceAccountIssuer = .serviceAccountIssuer | .api.apiAudiences = "istio-ca,sts.amazonaws.com"
|
||||
| .api.etcdServers = .api.allEtcdEndpoints
|
||||
| .network.multus.enabled = "true"
|
||||
| .addons.clusterBackup.enabled = "true" | .addons.clusterBackup.repository = strenv(restic_repo) | .addons.clusterBackup.password = strenv(restic_pw)
|
||||
| .addons.clusterBackup.extraEnv[0].name = "AWS_DEFAULT_REGION" | .addons.clusterBackup.extraEnv[0].value = strenv(REGION)
|
||||
' ${HOSTFS}/etc/kubernetes/kubezero.yaml
|
||||
# enable backup and awsIamAuth & multus
|
||||
yq -i eval '
|
||||
.api.awsIamAuth.enabled = "true" | .api.awsIamAuth.workerNodeRole = .workerNodeRole | .api.awsIamAuth.kubeAdminRole = .kubeAdminRole
|
||||
| .api.serviceAccountIssuer = .serviceAccountIssuer | .api.apiAudiences = "istio-ca,sts.amazonaws.com"
|
||||
' ${HOSTFS}/etc/kubernetes/kubezero.yaml
|
||||
|
||||
export restic_repo=$(grep "export RESTIC_REPOSITORY" ${HOSTFS}/usr/local/sbin/backup_control_plane.disabled | sed -e 's/.*=//' | sed -e 's/"//g')
|
||||
export restic_pw="$(cat ${HOSTFS}/etc/cloudbender/clusterBackup.passphrase)"
|
||||
export REGION=$(kubectl get node $NODE_NAME -o yaml | yq eval '.metadata.labels."topology.kubernetes.io/region"' -)
|
||||
|
||||
if [ -n "$restic_repo" ]; then
|
||||
yq -i eval '
|
||||
.addons.clusterBackup.enabled = "true" | .addons.clusterBackup.repository = strenv(restic_repo) | .addons.clusterBackup.password = strenv(restic_pw)
|
||||
| .addons.clusterBackup.extraEnv[0].name = "AWS_DEFAULT_REGION" | .addons.clusterBackup.extraEnv[0].value = strenv(REGION)
|
||||
' ${HOSTFS}/etc/kubernetes/kubezero.yaml
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "All done."
|
||||
|
||||
@ -284,7 +292,7 @@ elif [[ "$1" =~ "^(bootstrap|recover|join)$" ]]; then
|
||||
yq eval -M ".clusters[0].cluster.certificate-authority-data = \"$(cat ${HOSTFS}/etc/kubernetes/pki/ca.crt | base64 -w0)\"" ${WORKDIR}/kubeadm/templates/admin-aws-iam.yaml > ${HOSTFS}/etc/kubernetes/admin-aws-iam.yaml
|
||||
fi
|
||||
|
||||
# Install some basics on bootstrap and join for 1.21.7 to get new modules in place
|
||||
# Install some basics on bootstrap and join for 1.21 to get new modules in place
|
||||
if [[ "$1" =~ "^(bootstrap|join|recover)$" ]]; then
|
||||
helm repo add kubezero https://cdn.zero-downtime.net/charts/
|
||||
|
||||
@ -322,11 +330,15 @@ elif [ "$1" == 'backup' ]; then
|
||||
|
||||
echo "Backup complete"
|
||||
|
||||
# Remove all previous
|
||||
# Remove backups from previous versions
|
||||
restic forget --keep-tag $VERSION --prune
|
||||
|
||||
# Regular retention
|
||||
restic forget --keep-hourly 24 --keep-daily ${RESTIC_RETENTION:-7} --prune
|
||||
|
||||
# Defrag etcd backend
|
||||
etcdctl --endpoints=https://${ETCD_NODENAME}:2379 defrag
|
||||
|
||||
|
||||
elif [ "$1" == 'restore' ]; then
|
||||
mkdir -p ${WORKDIR}
|
||||
@ -342,6 +354,10 @@ elif [ "$1" == 'restore' ]; then
|
||||
# Always use kubeadm kubectl config to never run into chicken egg with custom auth hooks
|
||||
cp ${WORKDIR}/admin.conf ${HOSTFS}/root/.kube/config
|
||||
|
||||
elif [ "$1" == 'debug_shell' ]; then
|
||||
echo "Entering debug shell"
|
||||
/bin/sh
|
||||
|
||||
else
|
||||
echo "Unknown command!"
|
||||
exit 1
|
||||
|
@ -1,5 +1,8 @@
|
||||
#!/bin/bash
|
||||
set -eux
|
||||
#!/bin/bash -e
|
||||
|
||||
VERSION="v1.21.8"
|
||||
|
||||
[ -n "$DEBUG" ] && DEBUG=1
|
||||
|
||||
# unset any AWS_DEFAULT_PROFILE as it will break aws-iam-auth
|
||||
unset AWS_DEFAULT_PROFILE
|
||||
@ -9,11 +12,11 @@ nodes=$(kubectl get nodes -l node-role.kubernetes.io/control-plane -o json | jq
|
||||
for node in $nodes; do
|
||||
echo "Deploying node upgrade job on $node..."
|
||||
|
||||
cat <<'EOF' | sed -e "s/__node__/$node/g" | kubectl apply -f -
|
||||
cat <<EOF | sed -e "s/__node__/$node/g" | kubectl apply -f -
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: kubezero-upgrade-node-__node__
|
||||
name: kubezero-upgrade-${VERSION//.}-node-__node__
|
||||
namespace: kube-system
|
||||
labels:
|
||||
app: kubezero-upgrade-node
|
||||
@ -21,14 +24,14 @@ spec:
|
||||
hostNetwork: true
|
||||
containers:
|
||||
- name: kubezero-admin
|
||||
image: public.ecr.aws/zero-downtime/kubezero-admin:v1.21.7
|
||||
image: public.ecr.aws/zero-downtime/kubezero-admin:${VERSION}
|
||||
imagePullPolicy: Always
|
||||
command: ["kubezero.sh"]
|
||||
args:
|
||||
- node-upgrade
|
||||
env:
|
||||
- name: DEBUG
|
||||
value: "1"
|
||||
value: "$DEBUG"
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
@ -55,21 +58,21 @@ spec:
|
||||
effect: NoSchedule
|
||||
restartPolicy: Never
|
||||
EOF
|
||||
kubectl wait pod kubezero-upgrade-node-$node -n kube-system --timeout 120s --for=condition=initialized
|
||||
kubectl wait pod kubezero-upgrade-${VERSION//.}-node-$node -n kube-system --timeout 120s --for=condition=initialized 2>/dev/null
|
||||
while true; do
|
||||
kubectl logs kubezero-upgrade-node-$node -n kube-system -f && break
|
||||
kubectl logs kubezero-upgrade-${VERSION//.}-node-$node -n kube-system -f 2>/dev/null && break
|
||||
sleep 3
|
||||
done
|
||||
kubectl delete pod kubezero-upgrade-node-$node -n kube-system
|
||||
kubectl delete pod kubezero-upgrade-${VERSION//.}-node-$node -n kube-system
|
||||
done
|
||||
|
||||
echo "Deploying cluster upgrade job ..."
|
||||
|
||||
cat <<'EOF' | kubectl apply -f -
|
||||
cat <<EOF | kubectl apply -f -
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: kubezero-upgrade-v121
|
||||
name: kubezero-upgrade-${VERSION//.}
|
||||
namespace: kube-system
|
||||
labels:
|
||||
app: kubezero-upgrade
|
||||
@ -79,14 +82,14 @@ spec:
|
||||
#hostPID: true
|
||||
containers:
|
||||
- name: kubezero-admin
|
||||
image: public.ecr.aws/zero-downtime/kubezero-admin:v1.21.7
|
||||
image: public.ecr.aws/zero-downtime/kubezero-admin:${VERSION}
|
||||
imagePullPolicy: Always
|
||||
command: ["kubezero.sh"]
|
||||
args:
|
||||
- upgrade
|
||||
env:
|
||||
- name: DEBUG
|
||||
value: "1"
|
||||
value: "$DEBUG"
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
@ -107,15 +110,16 @@ spec:
|
||||
- name: workdir
|
||||
emptyDir: {}
|
||||
nodeSelector:
|
||||
node-role.kubernetes.io/master: ""
|
||||
node-role.kubernetes.io/control-plane: ""
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/master
|
||||
effect: NoSchedule
|
||||
restartPolicy: Never
|
||||
EOF
|
||||
|
||||
kubectl wait pod kubezero-upgrade-v121 -n kube-system --timeout 120s --for=condition=initialized
|
||||
kubectl wait pod kubezero-upgrade-${VERSION//.} -n kube-system --timeout 120s --for=condition=initialized 2>/dev/null
|
||||
while true; do
|
||||
kubectl logs kubezero-upgrade-v121 -n kube-system -f && break
|
||||
kubectl logs kubezero-upgrade-${VERSION//.} -n kube-system -f 2>/dev/null && break
|
||||
sleep 3
|
||||
done
|
||||
kubectl delete pod kubezero-upgrade-${VERSION//.} -n kube-system
|
||||
|
Loading…
Reference in New Issue
Block a user