From 975d166bec133f86725ff7da0246c5b5e545469f Mon Sep 17 00:00:00 2001 From: Stefan Reimer Date: Mon, 16 May 2022 10:15:41 +0200 Subject: [PATCH] fix: Release 1.22.8, minor upgrade fixes, bump prometheus max. memory --- Dockerfile | 1 + charts/kubezero-metrics/README.md | 2 +- charts/kubezero-metrics/values.yaml | 2 +- charts/kubezero/README.md | 4 ++-- charts/kubezero/values.yaml | 4 ++-- docs/inf1-test.yaml | 27 +++++++++++++++++++++++++++ releases/v1.22/kubezero.sh | 23 +++++++++++++++++++---- releases/v1.22/migrate_argo.py | 2 +- 8 files changed, 54 insertions(+), 11 deletions(-) create mode 100644 docs/inf1-test.yaml diff --git a/Dockerfile b/Dockerfile index 0d55b294..6cdbef06 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,6 +16,7 @@ RUN cd /etc/apk/keys && \ cri-tools@kubezero \ kubeadm@kubezero~=${KUBE_VERSION} \ kubectl@kubezero~=${KUBE_VERSION} \ + etcdhelper@kubezero \ etcd-ctl@testing \ restic@testing \ helm@testing diff --git a/charts/kubezero-metrics/README.md b/charts/kubezero-metrics/README.md index 68e7f5d0..cc9b868c 100644 --- a/charts/kubezero-metrics/README.md +++ b/charts/kubezero-metrics/README.md @@ -160,7 +160,7 @@ Kubernetes: `>= 1.20.0` | kube-prometheus-stack.prometheus.prometheusSpec.logFormat | string | `"json"` | | | kube-prometheus-stack.prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues | bool | `false` | | | kube-prometheus-stack.prometheus.prometheusSpec.portName | string | `"http-prometheus"` | | -| kube-prometheus-stack.prometheus.prometheusSpec.resources.limits.memory | string | `"3Gi"` | | +| kube-prometheus-stack.prometheus.prometheusSpec.resources.limits.memory | string | `"4Gi"` | | | kube-prometheus-stack.prometheus.prometheusSpec.resources.requests.cpu | string | `"500m"` | | | kube-prometheus-stack.prometheus.prometheusSpec.resources.requests.memory | string | `"512Mi"` | | | kube-prometheus-stack.prometheus.prometheusSpec.retention | string | `"8d"` | | diff --git a/charts/kubezero-metrics/values.yaml b/charts/kubezero-metrics/values.yaml index 636e0bb2..7bcd277c 100644 --- a/charts/kubezero-metrics/values.yaml +++ b/charts/kubezero-metrics/values.yaml @@ -108,7 +108,7 @@ kube-prometheus-stack: memory: 512Mi cpu: 500m limits: - memory: 3Gi + memory: 4Gi # cpu: "1000m" walCompression: true diff --git a/charts/kubezero/README.md b/charts/kubezero/README.md index 0c48399b..c817bebe 100644 --- a/charts/kubezero/README.md +++ b/charts/kubezero/README.md @@ -26,14 +26,14 @@ Kubernetes: `>= 1.20.0` |-----|------|---------|-------------| | HighAvailableControlplane | bool | `false` | | | addons.enabled | bool | `false` | | -| addons.targetRevision | string | `"0.5.2"` | | +| addons.targetRevision | string | `"0.5.3"` | | | argocd.enabled | bool | `false` | | | argocd.istio.enabled | bool | `false` | | | argocd.namespace | string | `"argocd"` | | | argocd.targetRevision | string | `"0.10.1"` | | | cert-manager.enabled | bool | `false` | | | cert-manager.namespace | string | `"cert-manager"` | | -| cert-manager.targetRevision | string | `"0.9.0"` | | +| cert-manager.targetRevision | string | `"0.9.1"` | | | istio-ingress.chart | string | `"kubezero-istio-gateway"` | | | istio-ingress.enabled | bool | `false` | | | istio-ingress.gateway.service | object | `{}` | | diff --git a/charts/kubezero/values.yaml b/charts/kubezero/values.yaml index c0ac2580..a04976c7 100644 --- a/charts/kubezero/values.yaml +++ b/charts/kubezero/values.yaml @@ -8,7 +8,7 @@ HighAvailableControlplane: false addons: enabled: false - targetRevision: 0.5.2 + targetRevision: 0.5.3 network: enabled: false @@ -18,7 +18,7 @@ network: cert-manager: enabled: false namespace: cert-manager - targetRevision: 0.9.0 + targetRevision: 0.9.1 storage: enabled: false diff --git a/docs/inf1-test.yaml b/docs/inf1-test.yaml new file mode 100644 index 00000000..8f0a5a97 --- /dev/null +++ b/docs/inf1-test.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +kind: Pod +metadata: + labels: + app: neuron-top + name: neuron-top + namespace: default +spec: + containers: + - image: public.ecr.aws/zero-downtime/dumpster:latest + command: + - "bash" + - "-c" + - "/opt/aws/neuron/bin/neuron-ls && sleep 3600" + imagePullPolicy: IfNotPresent + name: neuron-top + resources: + limits: + #hugepages-2Mi: 256Mi + aws.amazon.com/neuron: 1 + requests: + memory: 1024Mi + tolerations: + - effect: NoSchedule + key: kubezero-workergroup + operator: Equal + value: public diff --git a/releases/v1.22/kubezero.sh b/releases/v1.22/kubezero.sh index 4be8bc09..762fb0a4 100755 --- a/releases/v1.22/kubezero.sh +++ b/releases/v1.22/kubezero.sh @@ -186,7 +186,7 @@ elif [[ "$1" =~ "^(bootstrap|restore|join)$" ]]; then render_kubeadm - # Esnure clean slate if bootstrap, restore PKI otherwise + # Ensure clean slate if bootstrap, restore PKI otherwise if [[ "$1" =~ "^(bootstrap)$" ]]; then rm -rf ${HOSTFS}/var/lib/etcd/member @@ -235,6 +235,11 @@ elif [[ "$1" =~ "^(bootstrap|restore|join)$" ]]; then _kubeadm init phase kubeconfig all if [[ "$1" =~ "^(join)$" ]]; then + # Delete any former self in case forseti did not delete yet + kubectl delete node ${NODENAME} --wait=true || true + # Wait for all pods to be deleted otherwise we end up with stale pods eg. kube-proxy and all goes to .... + kubectl delete pods -n kube-system --field-selector spec.nodeName=${NODENAME} + # get current running etcd pods for etcdctl commands while true; do etcd_endpoints=$(kubectl get pods -n kube-system -l component=etcd -o yaml | \ @@ -273,9 +278,6 @@ elif [[ "$1" =~ "^(bootstrap|restore|join)$" ]]; then | .etcd.initialCluster = strenv(ETCD_INITIAL_CLUSTER) ' ${HOSTFS}/etc/kubernetes/kubezero.yaml render_kubeadm - - # Delete any former self in case forseti did not delete yet - kubectl delete node ${NODENAME} --wait=true || true fi # Generate our custom etcd yaml @@ -290,6 +292,19 @@ elif [[ "$1" =~ "^(bootstrap|restore|join)$" ]]; then echo "Waiting for Kubernetes API to be online ..." retry 0 5 30 kubectl cluster-info --request-timeout 3 >/dev/null + # Update providerID as underlying VM changed during restore + if [[ "$1" =~ "^(restore)$" ]]; then + PROVIDER_ID=$(yq eval '.providerID' ${HOSTFS}/etc/kubernetes/kubezero.yaml) + if [ -n "$PROVIDER_ID" ]; then + etcdhelper \ + -cacert ${HOSTFS}/etc/kubernetes/pki/etcd/ca.crt \ + -cert ${HOSTFS}/etc/kubernetes/pki/etcd/server.crt \ + -key ${HOSTFS}/etc/kubernetes/pki/etcd/server.key \ + -endpoint https://${ETCD_NODENAME}:2379 \ + change-provider-id ${NODENAME} $PROVIDER_ID + fi + fi + if [[ ! "$1" =~ "^(join)$" ]]; then _kubeadm init phase upload-config all _kubeadm init phase upload-certs --skip-certificate-key-print diff --git a/releases/v1.22/migrate_argo.py b/releases/v1.22/migrate_argo.py index ba8ba12d..9af175b6 100755 --- a/releases/v1.22/migrate_argo.py +++ b/releases/v1.22/migrate_argo.py @@ -15,7 +15,7 @@ parser = argparse.ArgumentParser(description="Update Route53 entries") parser.add_argument( "--version", dest="version", - default="1.22.8-7", + default="1.22.8-8", action="store", required=False, help="Update KubeZero version",