Resource request tuning

2023-12-01 16:56:56 +00:00 · 2023-12-01 16:56:56 +00:00 · b2cf56367d
parent 8a7ff1f8a1
commit b2cf56367d
8 changed files with 138 additions and 138 deletions
--- a/admin/libhelm.sh
+++ b/admin/libhelm.sh
@ -3,6 +3,9 @@
 # Simulate well-known CRDs being available
 API_VERSIONS="-a monitoring.coreos.com/v1 -a snapshot.storage.k8s.io/v1 -a policy/v1/PodDisruptionBudget"

+#VERSION="latest"
+VERSION="v1.27"
+
 # Waits for max 300s and retries
 function wait_for() {
  local TRIES=0
@ -182,3 +185,126 @@ function _helm() {

  return 0
 }
+
+function all_nodes_upgrade() {
+  CMD="$1"
+
+  echo "Deploy all node upgrade daemonSet(busybox)"
+  cat <<EOF | kubectl apply -f -
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: kubezero-all-nodes-upgrade
+  namespace: kube-system
+  labels:
+    app: kubezero-upgrade
+spec:
+  selector:
+    matchLabels:
+      name: kubezero-all-nodes-upgrade
+  template:
+    metadata:
+      labels:
+        name: kubezero-all-nodes-upgrade
+    spec:
+      hostNetwork: true
+      hostIPC: true
+      hostPID: true
+      tolerations:
+      - key: node-role.kubernetes.io/control-plane
+        operator: Exists
+        effect: NoSchedule
+      initContainers:
+      - name: node-upgrade
+        image: busybox
+        command: ["/bin/sh"]
+        args: ["-x", "-c", "$CMD" ]
+        volumeMounts:
+        - name: host
+          mountPath: /host
+        - name: hostproc
+          mountPath: /hostproc
+        securityContext:
+          privileged: true
+          capabilities:
+            add: ["SYS_ADMIN"]
+      containers:
+      - name: node-upgrade-wait
+        image: busybox
+        command: ["sleep", "3600"]
+      volumes:
+      - name: host
+        hostPath:
+          path: /
+          type: Directory
+      - name: hostproc
+        hostPath:
+          path: /proc
+          type: Directory
+EOF
+
+  kubectl rollout status daemonset -n kube-system kubezero-all-nodes-upgrade --timeout 300s
+  kubectl delete ds kubezero-all-nodes-upgrade -n kube-system
+}
+
+
+function control_plane_upgrade() {
+  TASKS="$1"
+
+  echo "Deploy cluster admin task: $TASKS"
+  cat <<EOF | kubectl apply -f -
+apiVersion: v1
+kind: Pod
+metadata:
+  name: kubezero-upgrade
+  namespace: kube-system
+  labels:
+    app: kubezero-upgrade
+spec:
+  hostNetwork: true
+  hostIPC: true
+  hostPID: true
+  containers:
+  - name: kubezero-admin
+    image: public.ecr.aws/zero-downtime/kubezero-admin:${VERSION}
+    imagePullPolicy: Always
+    command: ["kubezero.sh"]
+    args: [$TASKS]
+    env:
+    - name: DEBUG
+      value: "$DEBUG"
+    - name: NODE_NAME
+      valueFrom:
+        fieldRef:
+          fieldPath: spec.nodeName
+    volumeMounts:
+    - name: host
+      mountPath: /host
+    - name: workdir
+      mountPath: /tmp
+    securityContext:
+      capabilities:
+        add: ["SYS_CHROOT"]
+  volumes:
+  - name: host
+    hostPath:
+      path: /
+      type: Directory
+  - name: workdir
+    emptyDir: {}
+  nodeSelector:
+    node-role.kubernetes.io/control-plane: ""
+  tolerations:
+  - key: node-role.kubernetes.io/control-plane
+    operator: Exists
+    effect: NoSchedule
+  restartPolicy: Never
+EOF
+
+  kubectl wait pod kubezero-upgrade -n kube-system --timeout 120s --for=condition=initialized 2>/dev/null
+  while true; do
+    kubectl logs kubezero-upgrade -n kube-system -f 2>/dev/null && break
+    sleep 3
+  done
+  kubectl delete pod kubezero-upgrade -n kube-system
+}
--- a/admin/upgrade_cluster.sh
+++ b/admin/upgrade_cluster.sh
@ -2,139 +2,13 @@
 set -eE
 set -o pipefail

-#VERSION="latest"
-VERSION="v1.27"
 ARGO_APP=${1:-/tmp/new-kubezero-argoapp.yaml}

 SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 # shellcheck disable=SC1091
-. "$SCRIPT_DIR"/libhelm.sh
-
 [ -n "$DEBUG" ] && set -x

-all_nodes_upgrade() {
-  CMD="$1"
-
-  echo "Deploy all node upgrade daemonSet(busybox)"
-  cat <<EOF | kubectl apply -f -
-apiVersion: apps/v1
-kind: DaemonSet
-metadata:
-  name: kubezero-all-nodes-upgrade
-  namespace: kube-system
-  labels:
-    app: kubezero-upgrade
-spec:
-  selector:
-    matchLabels:
-      name: kubezero-all-nodes-upgrade
-  template:
-    metadata:
-      labels:
-        name: kubezero-all-nodes-upgrade
-    spec:
-      hostNetwork: true
-      hostIPC: true
-      hostPID: true
-      tolerations:
-      - key: node-role.kubernetes.io/control-plane
-        operator: Exists
-        effect: NoSchedule
-      initContainers:
-      - name: node-upgrade
-        image: busybox
-        command: ["/bin/sh"]
-        args: ["-x", "-c", "$CMD" ]
-        volumeMounts:
-        - name: host
-          mountPath: /host
-        - name: hostproc
-          mountPath: /hostproc
-        securityContext:
-          privileged: true
-          capabilities:
-            add: ["SYS_ADMIN"]
-      containers:
-      - name: node-upgrade-wait
-        image: busybox
-        command: ["sleep", "3600"]
-      volumes:
-      - name: host
-        hostPath:
-          path: /
-          type: Directory
-      - name: hostproc
-        hostPath:
-          path: /proc
-          type: Directory
-EOF
-
-  kubectl rollout status daemonset -n kube-system kubezero-all-nodes-upgrade --timeout 300s
-  kubectl delete ds kubezero-all-nodes-upgrade -n kube-system
-}
-
-
-control_plane_upgrade() {
-  TASKS="$1"
-
-  echo "Deploy cluster admin task: $TASKS"
-  cat <<EOF | kubectl apply -f -
-apiVersion: v1
-kind: Pod
-metadata:
-  name: kubezero-upgrade
-  namespace: kube-system
-  labels:
-    app: kubezero-upgrade
-spec:
-  hostNetwork: true
-  hostIPC: true
-  hostPID: true
-  containers:
-  - name: kubezero-admin
-    image: public.ecr.aws/zero-downtime/kubezero-admin:${VERSION}
-    imagePullPolicy: Always
-    command: ["kubezero.sh"]
-    args: [$TASKS]
-    env:
-    - name: DEBUG
-      value: "$DEBUG"
-    - name: NODE_NAME
-      valueFrom:
-        fieldRef:
-          fieldPath: spec.nodeName
-    volumeMounts:
-    - name: host
-      mountPath: /host
-    - name: workdir
-      mountPath: /tmp
-    securityContext:
-      capabilities:
-        add: ["SYS_CHROOT"]
-  volumes:
-  - name: host
-    hostPath:
-      path: /
-      type: Directory
-  - name: workdir
-    emptyDir: {}
-  nodeSelector:
-    node-role.kubernetes.io/control-plane: ""
-  tolerations:
-  - key: node-role.kubernetes.io/control-plane
-    operator: Exists
-    effect: NoSchedule
-  restartPolicy: Never
-EOF
-
-  kubectl wait pod kubezero-upgrade -n kube-system --timeout 120s --for=condition=initialized 2>/dev/null
-  while true; do
-    kubectl logs kubezero-upgrade -n kube-system -f 2>/dev/null && break
-    sleep 3
-  done
-  kubectl delete pod kubezero-upgrade -n kube-system
-}
-
+. "$SCRIPT_DIR"/libhelm.sh

 echo "Checking that all pods in kube-system are running ..."
 waitSystemPodsRunning
@ -154,7 +28,7 @@ control_plane_upgrade kubeadm_upgrade
 control_plane_upgrade apply_network

 echo "Wait for all CNI agents to be running ..." 
-kubectl rollout status ds/cilium -n kube-system --timeout=120s
+kubectl rollout status ds/cilium -n kube-system --timeout=300s

 all_nodes_upgrade "cd /host/etc/cni/net.d && ln -s 05-cilium.conflist 05-cilium.conf || true"
 # v1.27
--- a/charts/kubeadm/templates/patches/etcd0.yaml
+++ b/charts/kubeadm/templates/patches/etcd0.yaml
@ -3,6 +3,6 @@ spec:
  - name: etcd
    resources:
      requests:
-        cpu: 200m
-        memory: 192Mi
+        cpu: 50m
+        memory: 256Mi
        #ephemeral-storage: 1Gi
--- a/charts/kubeadm/templates/patches/kube-apiserver0.yaml
+++ b/charts/kubeadm/templates/patches/kube-apiserver0.yaml
@ -4,5 +4,5 @@ spec:
  - name: kube-apiserver
    resources:
      requests:
-        cpu: 200m
-        memory: 1Gi
+        cpu: 250m
+        memory: 1268Mi
--- a/charts/kubeadm/templates/patches/kube-controller-manager0.yaml
+++ b/charts/kubeadm/templates/patches/kube-controller-manager0.yaml
@ -3,5 +3,5 @@ spec:
  - name: kube-controller-manager
    resources:
      requests:
-        cpu: 100m
-        memory: 128Mi
+        cpu: 50m
+        memory: 192Mi
--- a/charts/kubeadm/templates/patches/kube-scheduler0.yaml
+++ b/charts/kubeadm/templates/patches/kube-scheduler0.yaml
@ -3,5 +3,5 @@ spec:
  - name: kube-scheduler
    resources:
      requests:
-        cpu: 100m
-        memory: 64Mi
+        cpu: 50m
+        memory: 96Mi
--- a/charts/kubezero-logging/values.yaml
+++ b/charts/kubezero-logging/values.yaml
@ -248,7 +248,7 @@ fluent-bit:
  resources:
    requests:
      cpu: 20m
-      memory: 32Mi
+      memory: 48Mi
    limits:
      memory: 128Mi

--- a/charts/kubezero-storage/values.yaml
+++ b/charts/kubezero-storage/values.yaml
@ -222,7 +222,7 @@ aws-efs-csi-driver:
    resources:
      requests:
        cpu: 20m
-        memory: 64Mi
+        memory: 96Mi
      limits:
        memory: 128Mi