Resource request tuning

This commit is contained in:
Stefan Reimer 2023-12-01 16:56:56 +00:00
parent a9974b17b7
commit 6cfcfe698b
8 changed files with 138 additions and 138 deletions

View File

@ -3,6 +3,9 @@
# Simulate well-known CRDs being available # Simulate well-known CRDs being available
API_VERSIONS="-a monitoring.coreos.com/v1 -a snapshot.storage.k8s.io/v1 -a policy/v1/PodDisruptionBudget" API_VERSIONS="-a monitoring.coreos.com/v1 -a snapshot.storage.k8s.io/v1 -a policy/v1/PodDisruptionBudget"
#VERSION="latest"
VERSION="v1.27"
# Waits for max 300s and retries # Waits for max 300s and retries
function wait_for() { function wait_for() {
local TRIES=0 local TRIES=0
@ -182,3 +185,126 @@ function _helm() {
return 0 return 0
} }
function all_nodes_upgrade() {
CMD="$1"
echo "Deploy all node upgrade daemonSet(busybox)"
cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kubezero-all-nodes-upgrade
namespace: kube-system
labels:
app: kubezero-upgrade
spec:
selector:
matchLabels:
name: kubezero-all-nodes-upgrade
template:
metadata:
labels:
name: kubezero-all-nodes-upgrade
spec:
hostNetwork: true
hostIPC: true
hostPID: true
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
initContainers:
- name: node-upgrade
image: busybox
command: ["/bin/sh"]
args: ["-x", "-c", "$CMD" ]
volumeMounts:
- name: host
mountPath: /host
- name: hostproc
mountPath: /hostproc
securityContext:
privileged: true
capabilities:
add: ["SYS_ADMIN"]
containers:
- name: node-upgrade-wait
image: busybox
command: ["sleep", "3600"]
volumes:
- name: host
hostPath:
path: /
type: Directory
- name: hostproc
hostPath:
path: /proc
type: Directory
EOF
kubectl rollout status daemonset -n kube-system kubezero-all-nodes-upgrade --timeout 300s
kubectl delete ds kubezero-all-nodes-upgrade -n kube-system
}
function control_plane_upgrade() {
TASKS="$1"
echo "Deploy cluster admin task: $TASKS"
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: kubezero-upgrade
namespace: kube-system
labels:
app: kubezero-upgrade
spec:
hostNetwork: true
hostIPC: true
hostPID: true
containers:
- name: kubezero-admin
image: public.ecr.aws/zero-downtime/kubezero-admin:${VERSION}
imagePullPolicy: Always
command: ["kubezero.sh"]
args: [$TASKS]
env:
- name: DEBUG
value: "$DEBUG"
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
volumeMounts:
- name: host
mountPath: /host
- name: workdir
mountPath: /tmp
securityContext:
capabilities:
add: ["SYS_CHROOT"]
volumes:
- name: host
hostPath:
path: /
type: Directory
- name: workdir
emptyDir: {}
nodeSelector:
node-role.kubernetes.io/control-plane: ""
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
restartPolicy: Never
EOF
kubectl wait pod kubezero-upgrade -n kube-system --timeout 120s --for=condition=initialized 2>/dev/null
while true; do
kubectl logs kubezero-upgrade -n kube-system -f 2>/dev/null && break
sleep 3
done
kubectl delete pod kubezero-upgrade -n kube-system
}

View File

@ -2,139 +2,13 @@
set -eE set -eE
set -o pipefail set -o pipefail
#VERSION="latest"
VERSION="v1.27"
ARGO_APP=${1:-/tmp/new-kubezero-argoapp.yaml} ARGO_APP=${1:-/tmp/new-kubezero-argoapp.yaml}
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
# shellcheck disable=SC1091 # shellcheck disable=SC1091
. "$SCRIPT_DIR"/libhelm.sh
[ -n "$DEBUG" ] && set -x [ -n "$DEBUG" ] && set -x
all_nodes_upgrade() { . "$SCRIPT_DIR"/libhelm.sh
CMD="$1"
echo "Deploy all node upgrade daemonSet(busybox)"
cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kubezero-all-nodes-upgrade
namespace: kube-system
labels:
app: kubezero-upgrade
spec:
selector:
matchLabels:
name: kubezero-all-nodes-upgrade
template:
metadata:
labels:
name: kubezero-all-nodes-upgrade
spec:
hostNetwork: true
hostIPC: true
hostPID: true
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
initContainers:
- name: node-upgrade
image: busybox
command: ["/bin/sh"]
args: ["-x", "-c", "$CMD" ]
volumeMounts:
- name: host
mountPath: /host
- name: hostproc
mountPath: /hostproc
securityContext:
privileged: true
capabilities:
add: ["SYS_ADMIN"]
containers:
- name: node-upgrade-wait
image: busybox
command: ["sleep", "3600"]
volumes:
- name: host
hostPath:
path: /
type: Directory
- name: hostproc
hostPath:
path: /proc
type: Directory
EOF
kubectl rollout status daemonset -n kube-system kubezero-all-nodes-upgrade --timeout 300s
kubectl delete ds kubezero-all-nodes-upgrade -n kube-system
}
control_plane_upgrade() {
TASKS="$1"
echo "Deploy cluster admin task: $TASKS"
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: kubezero-upgrade
namespace: kube-system
labels:
app: kubezero-upgrade
spec:
hostNetwork: true
hostIPC: true
hostPID: true
containers:
- name: kubezero-admin
image: public.ecr.aws/zero-downtime/kubezero-admin:${VERSION}
imagePullPolicy: Always
command: ["kubezero.sh"]
args: [$TASKS]
env:
- name: DEBUG
value: "$DEBUG"
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
volumeMounts:
- name: host
mountPath: /host
- name: workdir
mountPath: /tmp
securityContext:
capabilities:
add: ["SYS_CHROOT"]
volumes:
- name: host
hostPath:
path: /
type: Directory
- name: workdir
emptyDir: {}
nodeSelector:
node-role.kubernetes.io/control-plane: ""
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
restartPolicy: Never
EOF
kubectl wait pod kubezero-upgrade -n kube-system --timeout 120s --for=condition=initialized 2>/dev/null
while true; do
kubectl logs kubezero-upgrade -n kube-system -f 2>/dev/null && break
sleep 3
done
kubectl delete pod kubezero-upgrade -n kube-system
}
echo "Checking that all pods in kube-system are running ..." echo "Checking that all pods in kube-system are running ..."
waitSystemPodsRunning waitSystemPodsRunning
@ -154,7 +28,7 @@ control_plane_upgrade kubeadm_upgrade
control_plane_upgrade apply_network control_plane_upgrade apply_network
echo "Wait for all CNI agents to be running ..." echo "Wait for all CNI agents to be running ..."
kubectl rollout status ds/cilium -n kube-system --timeout=120s kubectl rollout status ds/cilium -n kube-system --timeout=300s
all_nodes_upgrade "cd /host/etc/cni/net.d && ln -s 05-cilium.conflist 05-cilium.conf || true" all_nodes_upgrade "cd /host/etc/cni/net.d && ln -s 05-cilium.conflist 05-cilium.conf || true"
# v1.27 # v1.27

View File

@ -3,6 +3,6 @@ spec:
- name: etcd - name: etcd
resources: resources:
requests: requests:
cpu: 200m cpu: 50m
memory: 192Mi memory: 256Mi
#ephemeral-storage: 1Gi #ephemeral-storage: 1Gi

View File

@ -4,5 +4,5 @@ spec:
- name: kube-apiserver - name: kube-apiserver
resources: resources:
requests: requests:
cpu: 200m cpu: 250m
memory: 1Gi memory: 1268Mi

View File

@ -3,5 +3,5 @@ spec:
- name: kube-controller-manager - name: kube-controller-manager
resources: resources:
requests: requests:
cpu: 100m cpu: 50m
memory: 128Mi memory: 192Mi

View File

@ -3,5 +3,5 @@ spec:
- name: kube-scheduler - name: kube-scheduler
resources: resources:
requests: requests:
cpu: 100m cpu: 50m
memory: 64Mi memory: 96Mi

View File

@ -248,7 +248,7 @@ fluent-bit:
resources: resources:
requests: requests:
cpu: 20m cpu: 20m
memory: 32Mi memory: 48Mi
limits: limits:
memory: 128Mi memory: 128Mi

View File

@ -222,7 +222,7 @@ aws-efs-csi-driver:
resources: resources:
requests: requests:
cpu: 20m cpu: 20m
memory: 64Mi memory: 96Mi
limits: limits:
memory: 128Mi memory: 128Mi