From 760d173a1904589bca343adb52ede599a97dcffa Mon Sep 17 00:00:00 2001 From: Stefan Reimer Date: Tue, 20 Apr 2021 12:49:29 +0200 Subject: [PATCH] feat: improved update strategy and timing to reduce 5XX during istio ingress deployments --- charts/kubezero-istio-ingress/Chart.yaml | 2 +- .../bootstrap-config.yaml | 48 +++++++++++++++++++ .../istio-ingress/templates/deployment.yaml | 9 +++- .../templates/deployment.yaml | 9 +++- charts/kubezero-istio-ingress/values.yaml | 22 ++++++--- charts/kubezero-istio/.gitignore | 2 +- charts/kubezero-istio/Chart.yaml | 2 +- .../ingress-terminationgraceperiod.patch | 12 ----- charts/kubezero-istio/istio-discovery.patch | 16 ------- charts/kubezero-istio/update.sh | 41 ++++++++-------- charts/kubezero-istio/values.yaml | 6 +-- charts/kubezero-istio/zdt.patch | 47 ++++++++++++++++++ 12 files changed, 150 insertions(+), 66 deletions(-) create mode 100644 charts/kubezero-istio-ingress/bootstrap-config.yaml delete mode 100644 charts/kubezero-istio/ingress-terminationgraceperiod.patch delete mode 100644 charts/kubezero-istio/istio-discovery.patch create mode 100644 charts/kubezero-istio/zdt.patch diff --git a/charts/kubezero-istio-ingress/Chart.yaml b/charts/kubezero-istio-ingress/Chart.yaml index a2d0f39..a478d3a 100644 --- a/charts/kubezero-istio-ingress/Chart.yaml +++ b/charts/kubezero-istio-ingress/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: kubezero-istio-ingress description: KubeZero Umbrella Chart for Istio based Ingress type: application -version: 0.5.2 +version: 0.5.4 appVersion: 1.9.3 home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png diff --git a/charts/kubezero-istio-ingress/bootstrap-config.yaml b/charts/kubezero-istio-ingress/bootstrap-config.yaml new file mode 100644 index 0000000..fb39ddc --- /dev/null +++ b/charts/kubezero-istio-ingress/bootstrap-config.yaml @@ -0,0 +1,48 @@ +# https://www.envoyproxy.io/docs/envoy/v1.17.1/configuration/best_practices/edge#configuring-envoy-as-an-edge-proxy +# https://github.com/istio/istio/issues/24715 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: istio-gateway-bootstrap-config + namespace: {{ .Release.Namespace }} +data: + custom_bootstrap.json: | + { + "overload_manager": { + "actions": [ + { + "name": "envoy.overload_actions.shrink_heap", + "triggers": [ + { + "name": "envoy.resource_monitors.fixed_heap", + "threshold": { + "value": 0.9 + } + } + ] + }, + { + "name": "envoy.overload_actions.stop_accepting_requests", + "triggers": [ + { + "name": "envoy.resource_monitors.fixed_heap", + "threshold": { + "value": 0.99 + } + } + ] + } + ], + "refresh_interval": "0.25s", + "resource_monitors": [ + { + "name": "envoy.resource_monitors.fixed_heap", + "typed_config": { + "@type": "type.googleapis.com/envoy.extensions.resource_monitors.fixed_heap.v3.FixedHeapConfig", + "max_heap_size_bytes": 536870912 + } + } + ] + } + } diff --git a/charts/kubezero-istio-ingress/charts/istio-ingress/templates/deployment.yaml b/charts/kubezero-istio-ingress/charts/istio-ingress/templates/deployment.yaml index 70af55a..0c10978 100644 --- a/charts/kubezero-istio-ingress/charts/istio-ingress/templates/deployment.yaml +++ b/charts/kubezero-istio-ingress/charts/istio-ingress/templates/deployment.yaml @@ -17,6 +17,8 @@ spec: {{- if $gateway.replicaCount }} replicas: {{ $gateway.replicaCount }} {{- end }} + # Give the LB 120s to detect and take into service, should only be 40s by we are on AWS so ?? + minReadySeconds: 120 {{- end }} selector: matchLabels: @@ -69,7 +71,7 @@ spec: {{- if .Values.global.priorityClassName }} priorityClassName: "{{ .Values.global.priorityClassName }}" {{- end }} - terminationGracePeriodSeconds: 90 + terminationGracePeriodSeconds: 120 {{- if .Values.global.proxy.enableCoreDump }} initContainers: - name: enable-core-dump @@ -141,6 +143,11 @@ spec: privileged: false readOnlyRootFilesystem: true {{- end }} + #This needs kube-proxy support coming with 1.22 hopefully, cilium ? + #lifecycle: + # preStop: + # exec: + # command: ["/bin/sh","-c","sleep 30"] readinessProbe: failureThreshold: 30 httpGet: diff --git a/charts/kubezero-istio-ingress/charts/istio-private-ingress/templates/deployment.yaml b/charts/kubezero-istio-ingress/charts/istio-private-ingress/templates/deployment.yaml index 70af55a..0c10978 100644 --- a/charts/kubezero-istio-ingress/charts/istio-private-ingress/templates/deployment.yaml +++ b/charts/kubezero-istio-ingress/charts/istio-private-ingress/templates/deployment.yaml @@ -17,6 +17,8 @@ spec: {{- if $gateway.replicaCount }} replicas: {{ $gateway.replicaCount }} {{- end }} + # Give the LB 120s to detect and take into service, should only be 40s by we are on AWS so ?? + minReadySeconds: 120 {{- end }} selector: matchLabels: @@ -69,7 +71,7 @@ spec: {{- if .Values.global.priorityClassName }} priorityClassName: "{{ .Values.global.priorityClassName }}" {{- end }} - terminationGracePeriodSeconds: 90 + terminationGracePeriodSeconds: 120 {{- if .Values.global.proxy.enableCoreDump }} initContainers: - name: enable-core-dump @@ -141,6 +143,11 @@ spec: privileged: false readOnlyRootFilesystem: true {{- end }} + #This needs kube-proxy support coming with 1.22 hopefully, cilium ? + #lifecycle: + # preStop: + # exec: + # command: ["/bin/sh","-c","sleep 30"] readinessProbe: failureThreshold: 30 httpGet: diff --git a/charts/kubezero-istio-ingress/values.yaml b/charts/kubezero-istio-ingress/values.yaml index c26f8a6..79642a1 100644 --- a/charts/kubezero-istio-ingress/values.yaml +++ b/charts/kubezero-istio-ingress/values.yaml @@ -21,6 +21,8 @@ istio-ingress: istio-ingressgateway: autoscaleEnabled: false replicaCount: 1 + rollingMaxSurge: 1 + rollingMaxUnavailable: 0 resources: requests: cpu: 50m @@ -35,9 +37,8 @@ istio-ingress: topologyKey: kubernetes.io/hostname values: istio-ingressgateway type: NodePort - env: - TERMINATION_DRAIN_DURATION_SECONDS: '"60"' - # ISTIO_META_HTTP10: '"1"' + #podAnnotations: + # sidecar.istio.io/bootstrapOverride: istio-gateway-bootstrap-config # The node selector is normally the list of nodeports, see CloudBender nodeSelector: node.kubernetes.io/ingress.public: "30080_30443" @@ -72,6 +73,11 @@ istio-ingress: dnsNames: [] # - '*.example.com' + meshConfig: + defaultConfig: + proxyMetadata: + # ISTIO_META_HTTP10: 1 + istio-private-ingress: enabled: false @@ -85,6 +91,8 @@ istio-private-ingress: autoscaleEnabled: false replicaCount: 1 + rollingMaxSurge: 1 + rollingMaxUnavailable: 0 resources: requests: cpu: 50m @@ -99,13 +107,9 @@ istio-private-ingress: topologyKey: kubernetes.io/hostname values: istio-private-ingressgateway type: NodePort - env: - TERMINATION_DRAIN_DURATION_SECONDS: '"60"' - # ISTIO_META_HTTP10: '"1"' nodeSelector: node.kubernetes.io/ingress.private: "31080_31443" #nodeSelector: "31080_31443_31671_31672_31224" - ports: - name: status-port port: 15021 @@ -149,3 +153,7 @@ istio-private-ingress: dnsNames: [] # - '*.example.com' + meshConfig: + defaultConfig: + proxyMetadata: + # ISTIO_META_HTTP10: 1 diff --git a/charts/kubezero-istio/.gitignore b/charts/kubezero-istio/.gitignore index b4a6b6b..3c48b60 100644 --- a/charts/kubezero-istio/.gitignore +++ b/charts/kubezero-istio/.gitignore @@ -1,2 +1,2 @@ istioctl -istio-?.?.? +istio diff --git a/charts/kubezero-istio/Chart.yaml b/charts/kubezero-istio/Chart.yaml index f65d058..6710665 100644 --- a/charts/kubezero-istio/Chart.yaml +++ b/charts/kubezero-istio/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: kubezero-istio description: KubeZero Umbrella Chart for Istio type: application -version: 0.5.3 +version: 0.5.4 appVersion: 1.9.3 home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png diff --git a/charts/kubezero-istio/ingress-terminationgraceperiod.patch b/charts/kubezero-istio/ingress-terminationgraceperiod.patch deleted file mode 100644 index 0d3e465..0000000 --- a/charts/kubezero-istio/ingress-terminationgraceperiod.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff --git a/charts/kubezero-istio-ingress/charts/istio-ingress/templates/deployment.yaml b/charts/kubezero-istio-ingress/charts/istio-ingress/templates/deployment.yaml -index b69da65..b5137a4 100644 ---- a/charts/kubezero-istio-ingress/charts/istio-ingress/templates/deployment.yaml -+++ b/charts/kubezero-istio-ingress/charts/istio-ingress/templates/deployment.yaml -@@ -63,6 +63,7 @@ spec: - {{- if .Values.global.priorityClassName }} - priorityClassName: "{{ .Values.global.priorityClassName }}" - {{- end }} -+ terminationGracePeriodSeconds: 90 - {{- if .Values.global.proxy.enableCoreDump }} - initContainers: - - name: enable-core-dump diff --git a/charts/kubezero-istio/istio-discovery.patch b/charts/kubezero-istio/istio-discovery.patch deleted file mode 100644 index 9a1940c..0000000 --- a/charts/kubezero-istio/istio-discovery.patch +++ /dev/null @@ -1,16 +0,0 @@ -diff --git a/charts/kubezero-istio/charts/istio-discovery/templates/deployment.yaml b/charts/kubezero-istio/charts/istio-discovery/templates/deployment.yaml -index e4a983a..ba586de 100644 ---- a/charts/kubezero-istio/charts/istio-discovery/templates/deployment.yaml -+++ b/charts/kubezero-istio/charts/istio-discovery/templates/deployment.yaml -@@ -59,6 +59,11 @@ spec: - {{- end }} - securityContext: - fsGroup: 1337 -+ nodeSelector: -+ node-role.kubernetes.io/master: "" -+ tolerations: -+ - effect: NoSchedule -+ key: node-role.kubernetes.io/master - containers: - - name: discovery - {{- if contains "/" .Values.pilot.image }} diff --git a/charts/kubezero-istio/update.sh b/charts/kubezero-istio/update.sh index 16df9f1..98b2ca3 100755 --- a/charts/kubezero-istio/update.sh +++ b/charts/kubezero-istio/update.sh @@ -3,35 +3,32 @@ set -ex export ISTIO_VERSION=1.9.3 -if [ ! -d istio-$ISTIO_VERSION ]; then - NAME="istio-$ISTIO_VERSION" - URL="https://github.com/istio/istio/releases/download/${ISTIO_VERSION}/istio-${ISTIO_VERSION}-linux-amd64.tar.gz" - - curl -sL "$URL" | tar xz -fi - -# Extract control plane charts -rm -rf charts/base charts/istio-* -cp -r istio-${ISTIO_VERSION}/manifests/charts/base charts/ -cp -r istio-${ISTIO_VERSION}/manifests/charts/istio-control/istio-discovery charts/ - -# Patch for istiod to control plane -patch -p3 -i istio-discovery.patch --no-backup-if-mismatch +rm -rf istio +curl -sL "https://github.com/istio/istio/releases/download/${ISTIO_VERSION}/istio-${ISTIO_VERSION}-linux-amd64.tar.gz" | tar xz +mv istio-${ISTIO_VERSION} istio # remove unused old telemetry filters -rm -f charts/istio-discovery/templates/telemetryv2_1.[678].yaml +rm -f istio/manifests/charts/istio-control/istio-discovery/templates/telemetryv2_1.[678].yaml -# Ingress charts +# Patch +patch -p0 -i zdt.patch --no-backup-if-mismatch + +### Create kubezero istio charts + +# remove previous charts +rm -rf charts/base charts/istio-* + +# create istio main chart +cp -r istio/manifests/charts/base charts/ +cp -r istio/manifests/charts/istio-control/istio-discovery charts/ + +# Create ingress charts rm -rf ../kubezero-istio-ingress/charts/istio-* -cp -r istio-${ISTIO_VERSION}/manifests/charts/gateways/istio-ingress ../kubezero-istio-ingress/charts/ -cp -r istio-${ISTIO_VERSION}/manifests/charts/gateways/istio-ingress ../kubezero-istio-ingress/charts/istio-private-ingress +cp -r istio/manifests/charts/gateways/istio-ingress ../kubezero-istio-ingress/charts/ +cp -r istio/manifests/charts/gateways/istio-ingress ../kubezero-istio-ingress/charts/istio-private-ingress # Rename private chart sed -i -e 's/name: istio-ingress/name: istio-private-ingress/' ../kubezero-istio-ingress/charts/istio-private-ingress/Chart.yaml -# Patch for ingress for extended termination grace period -patch -i ingress-terminationgraceperiod.patch ../kubezero-istio-ingress/charts/istio-ingress/templates/deployment.yaml --no-backup-if-mismatch -patch -i ingress-terminationgraceperiod.patch ../kubezero-istio-ingress/charts/istio-private-ingress/templates/deployment.yaml --no-backup-if-mismatch - # Get matching istioctl [ -x istioctl ] && [ "$(./istioctl version --remote=false)" == $ISTIO_VERSION ] || { curl -sL https://github.com/istio/istio/releases/download/${ISTIO_VERSION}/istioctl-${ISTIO_VERSION}-linux-amd64.tar.gz | tar xz; chmod +x istioctl; } diff --git a/charts/kubezero-istio/values.yaml b/charts/kubezero-istio/values.yaml index 7b5c7b6..ad1fe62 100644 --- a/charts/kubezero-istio/values.yaml +++ b/charts/kubezero-istio/values.yaml @@ -1,6 +1,6 @@ global: # hub: docker.io/istio - # tag: 1.9.0 + # tag: 1.9.3 logAsJson: true jwtPolicy: first-party-jwt @@ -36,9 +36,7 @@ istio-discovery: meshConfig: defaultConfig: - terminationDrainDuration: 60s -# proxyMetadata: -# ISTIO_META_HTTP10: '"1"' + terminationDrainDuration: 20s accessLogFile: /dev/stdout accessLogEncoding: 'JSON' h2UpgradePolicy: 'DO_NOT_UPGRADE' diff --git a/charts/kubezero-istio/zdt.patch b/charts/kubezero-istio/zdt.patch new file mode 100644 index 0000000..0a15bb9 --- /dev/null +++ b/charts/kubezero-istio/zdt.patch @@ -0,0 +1,47 @@ +diff -turN istio/manifests/charts/gateways/istio-ingress/templates/deployment.yaml istio.zdt/manifests/charts/gateways/istio-ingress/templates/deployment.yaml +--- istio/manifests/charts/gateways/istio-ingress/templates/deployment.yaml 2021-04-11 01:57:29.000000000 +0200 ++++ istio.zdt/manifests/charts/gateways/istio-ingress/templates/deployment.yaml 2021-04-20 12:20:04.401862116 +0200 +@@ -17,6 +17,8 @@ + {{- if $gateway.replicaCount }} + replicas: {{ $gateway.replicaCount }} + {{- end }} ++ # Give the LB 120s to detect and take into service, should only be 40s by we are on AWS so ?? ++ minReadySeconds: 120 + {{- end }} + selector: + matchLabels: +@@ -69,6 +71,7 @@ + {{- if .Values.global.priorityClassName }} + priorityClassName: "{{ .Values.global.priorityClassName }}" + {{- end }} ++ terminationGracePeriodSeconds: 120 + {{- if .Values.global.proxy.enableCoreDump }} + initContainers: + - name: enable-core-dump +@@ -140,6 +143,11 @@ + privileged: false + readOnlyRootFilesystem: true + {{- end }} ++ #This needs kube-proxy support coming with 1.22 hopefully, cilium ? ++ #lifecycle: ++ # preStop: ++ # exec: ++ # command: ["/bin/sh","-c","sleep 30"] + readinessProbe: + failureThreshold: 30 + httpGet: +diff -turN istio/manifests/charts/istio-control/istio-discovery/templates/deployment.yaml istio.zdt/manifests/charts/istio-control/istio-discovery/templates/deployment.yaml +--- istio/manifests/charts/istio-control/istio-discovery/templates/deployment.yaml 2021-04-11 01:57:29.000000000 +0200 ++++ istio.zdt/manifests/charts/istio-control/istio-discovery/templates/deployment.yaml 2021-04-19 21:55:45.461749267 +0200 +@@ -60,6 +60,11 @@ + {{- end }} + securityContext: + fsGroup: 1337 ++ nodeSelector: ++ node-role.kubernetes.io/master: "" ++ tolerations: ++ - effect: NoSchedule ++ key: node-role.kubernetes.io/master + containers: + - name: discovery + {{- if contains "/" .Values.pilot.image }}