feat: improved update strategy and timing to reduce 5XX during istio ingress deployments

This commit is contained in:
Stefan Reimer 2021-04-20 12:49:29 +02:00
parent fd35a46b66
commit 64dd6160cb
12 changed files with 150 additions and 66 deletions

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: kubezero-istio-ingress name: kubezero-istio-ingress
description: KubeZero Umbrella Chart for Istio based Ingress description: KubeZero Umbrella Chart for Istio based Ingress
type: application type: application
version: 0.5.2 version: 0.5.4
appVersion: 1.9.3 appVersion: 1.9.3
home: https://kubezero.com home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png

View File

@ -0,0 +1,48 @@
# https://www.envoyproxy.io/docs/envoy/v1.17.1/configuration/best_practices/edge#configuring-envoy-as-an-edge-proxy
# https://github.com/istio/istio/issues/24715
apiVersion: v1
kind: ConfigMap
metadata:
name: istio-gateway-bootstrap-config
namespace: {{ .Release.Namespace }}
data:
custom_bootstrap.json: |
{
"overload_manager": {
"actions": [
{
"name": "envoy.overload_actions.shrink_heap",
"triggers": [
{
"name": "envoy.resource_monitors.fixed_heap",
"threshold": {
"value": 0.9
}
}
]
},
{
"name": "envoy.overload_actions.stop_accepting_requests",
"triggers": [
{
"name": "envoy.resource_monitors.fixed_heap",
"threshold": {
"value": 0.99
}
}
]
}
],
"refresh_interval": "0.25s",
"resource_monitors": [
{
"name": "envoy.resource_monitors.fixed_heap",
"typed_config": {
"@type": "type.googleapis.com/envoy.extensions.resource_monitors.fixed_heap.v3.FixedHeapConfig",
"max_heap_size_bytes": 536870912
}
}
]
}
}

View File

@ -17,6 +17,8 @@ spec:
{{- if $gateway.replicaCount }} {{- if $gateway.replicaCount }}
replicas: {{ $gateway.replicaCount }} replicas: {{ $gateway.replicaCount }}
{{- end }} {{- end }}
# Give the LB 120s to detect and take into service, should only be 40s by we are on AWS so ??
minReadySeconds: 120
{{- end }} {{- end }}
selector: selector:
matchLabels: matchLabels:
@ -69,7 +71,7 @@ spec:
{{- if .Values.global.priorityClassName }} {{- if .Values.global.priorityClassName }}
priorityClassName: "{{ .Values.global.priorityClassName }}" priorityClassName: "{{ .Values.global.priorityClassName }}"
{{- end }} {{- end }}
terminationGracePeriodSeconds: 90 terminationGracePeriodSeconds: 120
{{- if .Values.global.proxy.enableCoreDump }} {{- if .Values.global.proxy.enableCoreDump }}
initContainers: initContainers:
- name: enable-core-dump - name: enable-core-dump
@ -141,6 +143,11 @@ spec:
privileged: false privileged: false
readOnlyRootFilesystem: true readOnlyRootFilesystem: true
{{- end }} {{- end }}
#This needs kube-proxy support coming with 1.22 hopefully, cilium ?
#lifecycle:
# preStop:
# exec:
# command: ["/bin/sh","-c","sleep 30"]
readinessProbe: readinessProbe:
failureThreshold: 30 failureThreshold: 30
httpGet: httpGet:

View File

@ -17,6 +17,8 @@ spec:
{{- if $gateway.replicaCount }} {{- if $gateway.replicaCount }}
replicas: {{ $gateway.replicaCount }} replicas: {{ $gateway.replicaCount }}
{{- end }} {{- end }}
# Give the LB 120s to detect and take into service, should only be 40s by we are on AWS so ??
minReadySeconds: 120
{{- end }} {{- end }}
selector: selector:
matchLabels: matchLabels:
@ -69,7 +71,7 @@ spec:
{{- if .Values.global.priorityClassName }} {{- if .Values.global.priorityClassName }}
priorityClassName: "{{ .Values.global.priorityClassName }}" priorityClassName: "{{ .Values.global.priorityClassName }}"
{{- end }} {{- end }}
terminationGracePeriodSeconds: 90 terminationGracePeriodSeconds: 120
{{- if .Values.global.proxy.enableCoreDump }} {{- if .Values.global.proxy.enableCoreDump }}
initContainers: initContainers:
- name: enable-core-dump - name: enable-core-dump
@ -141,6 +143,11 @@ spec:
privileged: false privileged: false
readOnlyRootFilesystem: true readOnlyRootFilesystem: true
{{- end }} {{- end }}
#This needs kube-proxy support coming with 1.22 hopefully, cilium ?
#lifecycle:
# preStop:
# exec:
# command: ["/bin/sh","-c","sleep 30"]
readinessProbe: readinessProbe:
failureThreshold: 30 failureThreshold: 30
httpGet: httpGet:

View File

@ -21,6 +21,8 @@ istio-ingress:
istio-ingressgateway: istio-ingressgateway:
autoscaleEnabled: false autoscaleEnabled: false
replicaCount: 1 replicaCount: 1
rollingMaxSurge: 1
rollingMaxUnavailable: 0
resources: resources:
requests: requests:
cpu: 50m cpu: 50m
@ -35,9 +37,8 @@ istio-ingress:
topologyKey: kubernetes.io/hostname topologyKey: kubernetes.io/hostname
values: istio-ingressgateway values: istio-ingressgateway
type: NodePort type: NodePort
env: #podAnnotations:
TERMINATION_DRAIN_DURATION_SECONDS: '"60"' # sidecar.istio.io/bootstrapOverride: istio-gateway-bootstrap-config
# ISTIO_META_HTTP10: '"1"'
# The node selector is normally the list of nodeports, see CloudBender # The node selector is normally the list of nodeports, see CloudBender
nodeSelector: nodeSelector:
node.kubernetes.io/ingress.public: "30080_30443" node.kubernetes.io/ingress.public: "30080_30443"
@ -72,6 +73,11 @@ istio-ingress:
dnsNames: [] dnsNames: []
# - '*.example.com' # - '*.example.com'
meshConfig:
defaultConfig:
proxyMetadata:
# ISTIO_META_HTTP10: 1
istio-private-ingress: istio-private-ingress:
enabled: false enabled: false
@ -85,6 +91,8 @@ istio-private-ingress:
autoscaleEnabled: false autoscaleEnabled: false
replicaCount: 1 replicaCount: 1
rollingMaxSurge: 1
rollingMaxUnavailable: 0
resources: resources:
requests: requests:
cpu: 50m cpu: 50m
@ -99,13 +107,9 @@ istio-private-ingress:
topologyKey: kubernetes.io/hostname topologyKey: kubernetes.io/hostname
values: istio-private-ingressgateway values: istio-private-ingressgateway
type: NodePort type: NodePort
env:
TERMINATION_DRAIN_DURATION_SECONDS: '"60"'
# ISTIO_META_HTTP10: '"1"'
nodeSelector: nodeSelector:
node.kubernetes.io/ingress.private: "31080_31443" node.kubernetes.io/ingress.private: "31080_31443"
#nodeSelector: "31080_31443_31671_31672_31224" #nodeSelector: "31080_31443_31671_31672_31224"
ports: ports:
- name: status-port - name: status-port
port: 15021 port: 15021
@ -149,3 +153,7 @@ istio-private-ingress:
dnsNames: [] dnsNames: []
# - '*.example.com' # - '*.example.com'
meshConfig:
defaultConfig:
proxyMetadata:
# ISTIO_META_HTTP10: 1

View File

@ -1,2 +1,2 @@
istioctl istioctl
istio-?.?.? istio

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: kubezero-istio name: kubezero-istio
description: KubeZero Umbrella Chart for Istio description: KubeZero Umbrella Chart for Istio
type: application type: application
version: 0.5.3 version: 0.5.4
appVersion: 1.9.3 appVersion: 1.9.3
home: https://kubezero.com home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png

View File

@ -1,12 +0,0 @@
diff --git a/charts/kubezero-istio-ingress/charts/istio-ingress/templates/deployment.yaml b/charts/kubezero-istio-ingress/charts/istio-ingress/templates/deployment.yaml
index b69da65..b5137a4 100644
--- a/charts/kubezero-istio-ingress/charts/istio-ingress/templates/deployment.yaml
+++ b/charts/kubezero-istio-ingress/charts/istio-ingress/templates/deployment.yaml
@@ -63,6 +63,7 @@ spec:
{{- if .Values.global.priorityClassName }}
priorityClassName: "{{ .Values.global.priorityClassName }}"
{{- end }}
+ terminationGracePeriodSeconds: 90
{{- if .Values.global.proxy.enableCoreDump }}
initContainers:
- name: enable-core-dump

View File

@ -1,16 +0,0 @@
diff --git a/charts/kubezero-istio/charts/istio-discovery/templates/deployment.yaml b/charts/kubezero-istio/charts/istio-discovery/templates/deployment.yaml
index e4a983a..ba586de 100644
--- a/charts/kubezero-istio/charts/istio-discovery/templates/deployment.yaml
+++ b/charts/kubezero-istio/charts/istio-discovery/templates/deployment.yaml
@@ -59,6 +59,11 @@ spec:
{{- end }}
securityContext:
fsGroup: 1337
+ nodeSelector:
+ node-role.kubernetes.io/master: ""
+ tolerations:
+ - effect: NoSchedule
+ key: node-role.kubernetes.io/master
containers:
- name: discovery
{{- if contains "/" .Values.pilot.image }}

View File

@ -3,35 +3,32 @@ set -ex
export ISTIO_VERSION=1.9.3 export ISTIO_VERSION=1.9.3
if [ ! -d istio-$ISTIO_VERSION ]; then rm -rf istio
NAME="istio-$ISTIO_VERSION" curl -sL "https://github.com/istio/istio/releases/download/${ISTIO_VERSION}/istio-${ISTIO_VERSION}-linux-amd64.tar.gz" | tar xz
URL="https://github.com/istio/istio/releases/download/${ISTIO_VERSION}/istio-${ISTIO_VERSION}-linux-amd64.tar.gz" mv istio-${ISTIO_VERSION} istio
curl -sL "$URL" | tar xz
fi
# Extract control plane charts
rm -rf charts/base charts/istio-*
cp -r istio-${ISTIO_VERSION}/manifests/charts/base charts/
cp -r istio-${ISTIO_VERSION}/manifests/charts/istio-control/istio-discovery charts/
# Patch for istiod to control plane
patch -p3 -i istio-discovery.patch --no-backup-if-mismatch
# remove unused old telemetry filters # remove unused old telemetry filters
rm -f charts/istio-discovery/templates/telemetryv2_1.[678].yaml rm -f istio/manifests/charts/istio-control/istio-discovery/templates/telemetryv2_1.[678].yaml
# Ingress charts # Patch
patch -p0 -i zdt.patch --no-backup-if-mismatch
### Create kubezero istio charts
# remove previous charts
rm -rf charts/base charts/istio-*
# create istio main chart
cp -r istio/manifests/charts/base charts/
cp -r istio/manifests/charts/istio-control/istio-discovery charts/
# Create ingress charts
rm -rf ../kubezero-istio-ingress/charts/istio-* rm -rf ../kubezero-istio-ingress/charts/istio-*
cp -r istio-${ISTIO_VERSION}/manifests/charts/gateways/istio-ingress ../kubezero-istio-ingress/charts/ cp -r istio/manifests/charts/gateways/istio-ingress ../kubezero-istio-ingress/charts/
cp -r istio-${ISTIO_VERSION}/manifests/charts/gateways/istio-ingress ../kubezero-istio-ingress/charts/istio-private-ingress cp -r istio/manifests/charts/gateways/istio-ingress ../kubezero-istio-ingress/charts/istio-private-ingress
# Rename private chart # Rename private chart
sed -i -e 's/name: istio-ingress/name: istio-private-ingress/' ../kubezero-istio-ingress/charts/istio-private-ingress/Chart.yaml sed -i -e 's/name: istio-ingress/name: istio-private-ingress/' ../kubezero-istio-ingress/charts/istio-private-ingress/Chart.yaml
# Patch for ingress for extended termination grace period
patch -i ingress-terminationgraceperiod.patch ../kubezero-istio-ingress/charts/istio-ingress/templates/deployment.yaml --no-backup-if-mismatch
patch -i ingress-terminationgraceperiod.patch ../kubezero-istio-ingress/charts/istio-private-ingress/templates/deployment.yaml --no-backup-if-mismatch
# Get matching istioctl # Get matching istioctl
[ -x istioctl ] && [ "$(./istioctl version --remote=false)" == $ISTIO_VERSION ] || { curl -sL https://github.com/istio/istio/releases/download/${ISTIO_VERSION}/istioctl-${ISTIO_VERSION}-linux-amd64.tar.gz | tar xz; chmod +x istioctl; } [ -x istioctl ] && [ "$(./istioctl version --remote=false)" == $ISTIO_VERSION ] || { curl -sL https://github.com/istio/istio/releases/download/${ISTIO_VERSION}/istioctl-${ISTIO_VERSION}-linux-amd64.tar.gz | tar xz; chmod +x istioctl; }

View File

@ -1,6 +1,6 @@
global: global:
# hub: docker.io/istio # hub: docker.io/istio
# tag: 1.9.0 # tag: 1.9.3
logAsJson: true logAsJson: true
jwtPolicy: first-party-jwt jwtPolicy: first-party-jwt
@ -36,9 +36,7 @@ istio-discovery:
meshConfig: meshConfig:
defaultConfig: defaultConfig:
terminationDrainDuration: 60s terminationDrainDuration: 20s
# proxyMetadata:
# ISTIO_META_HTTP10: '"1"'
accessLogFile: /dev/stdout accessLogFile: /dev/stdout
accessLogEncoding: 'JSON' accessLogEncoding: 'JSON'
h2UpgradePolicy: 'DO_NOT_UPGRADE' h2UpgradePolicy: 'DO_NOT_UPGRADE'

View File

@ -0,0 +1,47 @@
diff -turN istio/manifests/charts/gateways/istio-ingress/templates/deployment.yaml istio.zdt/manifests/charts/gateways/istio-ingress/templates/deployment.yaml
--- istio/manifests/charts/gateways/istio-ingress/templates/deployment.yaml 2021-04-11 01:57:29.000000000 +0200
+++ istio.zdt/manifests/charts/gateways/istio-ingress/templates/deployment.yaml 2021-04-20 12:20:04.401862116 +0200
@@ -17,6 +17,8 @@
{{- if $gateway.replicaCount }}
replicas: {{ $gateway.replicaCount }}
{{- end }}
+ # Give the LB 120s to detect and take into service, should only be 40s by we are on AWS so ??
+ minReadySeconds: 120
{{- end }}
selector:
matchLabels:
@@ -69,6 +71,7 @@
{{- if .Values.global.priorityClassName }}
priorityClassName: "{{ .Values.global.priorityClassName }}"
{{- end }}
+ terminationGracePeriodSeconds: 120
{{- if .Values.global.proxy.enableCoreDump }}
initContainers:
- name: enable-core-dump
@@ -140,6 +143,11 @@
privileged: false
readOnlyRootFilesystem: true
{{- end }}
+ #This needs kube-proxy support coming with 1.22 hopefully, cilium ?
+ #lifecycle:
+ # preStop:
+ # exec:
+ # command: ["/bin/sh","-c","sleep 30"]
readinessProbe:
failureThreshold: 30
httpGet:
diff -turN istio/manifests/charts/istio-control/istio-discovery/templates/deployment.yaml istio.zdt/manifests/charts/istio-control/istio-discovery/templates/deployment.yaml
--- istio/manifests/charts/istio-control/istio-discovery/templates/deployment.yaml 2021-04-11 01:57:29.000000000 +0200
+++ istio.zdt/manifests/charts/istio-control/istio-discovery/templates/deployment.yaml 2021-04-19 21:55:45.461749267 +0200
@@ -60,6 +60,11 @@
{{- end }}
securityContext:
fsGroup: 1337
+ nodeSelector:
+ node-role.kubernetes.io/master: ""
+ tolerations:
+ - effect: NoSchedule
+ key: node-role.kubernetes.io/master
containers:
- name: discovery
{{- if contains "/" .Values.pilot.image }}