From dd9e465ead901958e31c7d590c4958d1391fabc5 Mon Sep 17 00:00:00 2001 From: Stefan Reimer Date: Thu, 17 Sep 2020 19:24:24 +0100 Subject: [PATCH 01/10] Enable TCP keepalive for fluentd listener --- charts/kubezero-logging/Chart.yaml | 2 +- charts/kubezero-logging/values.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/charts/kubezero-logging/Chart.yaml b/charts/kubezero-logging/Chart.yaml index 53a5e1e..2b1da0d 100644 --- a/charts/kubezero-logging/Chart.yaml +++ b/charts/kubezero-logging/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: kubezero-logging description: KubeZero Umbrella Chart for complete EFK stack type: application -version: 0.3.5 +version: 0.3.6 appVersion: 1.2.1 home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png diff --git a/charts/kubezero-logging/values.yaml b/charts/kubezero-logging/values.yaml index 00f6e77..b42275e 100644 --- a/charts/kubezero-logging/values.yaml +++ b/charts/kubezero-logging/values.yaml @@ -106,6 +106,7 @@ fluentd: port 24224 bind 0.0.0.0 skip_invalid_event true + send_keepalive_packet true cert_path /mnt/fluentd-certs/tls.crt private_key_path /mnt/fluentd-certs/tls.key -- 2.40.1 From 182ae141a0928141d21de7d46af0a3b1c279bfd7 Mon Sep 17 00:00:00 2001 From: Stefan Reimer Date: Thu, 17 Sep 2020 19:44:34 +0100 Subject: [PATCH 02/10] Revert TCP keepalive for fluentd listener --- charts/kubezero-logging/values.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/charts/kubezero-logging/values.yaml b/charts/kubezero-logging/values.yaml index b42275e..00f6e77 100644 --- a/charts/kubezero-logging/values.yaml +++ b/charts/kubezero-logging/values.yaml @@ -106,7 +106,6 @@ fluentd: port 24224 bind 0.0.0.0 skip_invalid_event true - send_keepalive_packet true cert_path /mnt/fluentd-certs/tls.crt private_key_path /mnt/fluentd-certs/tls.key -- 2.40.1 From b4c2195eef0bf25c12dccab141db6c5a85952fdb Mon Sep 17 00:00:00 2001 From: Stefan Reimer Date: Thu, 17 Sep 2020 22:25:09 +0100 Subject: [PATCH 03/10] Add EnvoyFilter to enable tcp keepalive for all Ingress Envoys --- charts/kubezero-istio/Chart.yaml | 2 +- charts/kubezero-istio/README.md | 2 +- .../kubezero-istio/templates/envoyfilter.yaml | 68 +++++++++++++++++++ 3 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 charts/kubezero-istio/templates/envoyfilter.yaml diff --git a/charts/kubezero-istio/Chart.yaml b/charts/kubezero-istio/Chart.yaml index 22dbe8d..a31a4cb 100644 --- a/charts/kubezero-istio/Chart.yaml +++ b/charts/kubezero-istio/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: kubezero-istio description: KubeZero Umbrella Chart for Istio type: application -version: 0.3.2 +version: 0.3.3 appVersion: 1.7.1 home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png diff --git a/charts/kubezero-istio/README.md b/charts/kubezero-istio/README.md index b3ac36e..cf39c80 100644 --- a/charts/kubezero-istio/README.md +++ b/charts/kubezero-istio/README.md @@ -5,7 +5,7 @@ KubeZero Umbrella Chart for Istio Installs Istio Operator and KubeZero Istio profile -Current chart version is `0.3.0` +Current chart version is `0.3.3` Source code can be found [here](https://kubezero.com) diff --git a/charts/kubezero-istio/templates/envoyfilter.yaml b/charts/kubezero-istio/templates/envoyfilter.yaml new file mode 100644 index 0000000..3351376 --- /dev/null +++ b/charts/kubezero-istio/templates/envoyfilter.yaml @@ -0,0 +1,68 @@ +apiVersion: networking.istio.io/v1alpha3 +kind: EnvoyFilter +metadata: + name: ingressgateway-listener-tcp-keepalive + namespace: istio-system +spec: + workloadSelector: + labels: + istio: ingressgateway + configPatches: + - applyTo: LISTENER + patch: + operation: MERGE + value: + socket_options: + # SOL_SOCKET = 1 + # SO_KEEPALIVE = 9 + - level: 1 + name: 9 + int_value: 1 + state: STATE_LISTENING + # IPPROTO_TCP = 6 + # TCP_KEEPIDLE = 4 + - level: 6 + name: 4 + int_value: 60 + state: STATE_LISTENING + # TCP_KEEPINTVL = 5 + - level: 6 + name: 5 + int_value: 60 + state: STATE_LISTENING + +{{- if .Values.ingress.private.enabled }} +--- +apiVersion: networking.istio.io/v1alpha3 +kind: EnvoyFilter +metadata: + name: private-ingressgateway-listener-tcp-keepalive + namespace: istio-system +spec: + workloadSelector: + labels: + istio: private-ingressgateway + configPatches: + - applyTo: LISTENER + patch: + operation: MERGE + value: + socket_options: + # SOL_SOCKET = 1 + # SO_KEEPALIVE = 9 + - level: 1 + name: 9 + int_value: 1 + state: STATE_LISTENING + # IPPROTO_TCP = 6 + # TCP_KEEPIDLE = 4 + - level: 6 + name: 4 + int_value: 60 + state: STATE_LISTENING + # TCP_KEEPINTVL = 5 + - level: 6 + name: 5 + int_value: 60 + state: STATE_LISTENING +{{- end }} -- 2.40.1 From 85837c1666063f20c84e550ca329a7ce132ac2e6 Mon Sep 17 00:00:00 2001 From: Stefan Reimer Date: Fri, 18 Sep 2020 13:09:18 +0100 Subject: [PATCH 04/10] Bump argocd to 1.7.5 as 1.7.4 has a deadlock CPU issue --- charts/kubezero-argo-cd/Chart.yaml | 2 +- charts/kubezero-argo-cd/values.yaml | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/charts/kubezero-argo-cd/Chart.yaml b/charts/kubezero-argo-cd/Chart.yaml index 72eb752..b6109d4 100644 --- a/charts/kubezero-argo-cd/Chart.yaml +++ b/charts/kubezero-argo-cd/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 description: KubeZero ArgoCD Helm chart to install ArgoCD itself and the KubeZero ArgoCD Application name: kubezero-argo-cd -version: 0.5.2 +version: 0.5.3 home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png keywords: diff --git a/charts/kubezero-argo-cd/values.yaml b/charts/kubezero-argo-cd/values.yaml index 75388a1..572732c 100644 --- a/charts/kubezero-argo-cd/values.yaml +++ b/charts/kubezero-argo-cd/values.yaml @@ -29,6 +29,10 @@ argo-cd: # argocdServerAdminPassword: "$2a$10$ivKzaXVxMqdeDSfS3nqi1Od3iDbnL7oXrixzDfZFRHlXHnAG6LydG" # argocdServerAdminPasswordMtime: "2020-04-24T15:33:09BST" + global: + image: + tag: v1.7.5 + controller: args: statusProcessors: "2" -- 2.40.1 From f753a1fc7116494d14ef0e4829c3550a85e077fd Mon Sep 17 00:00:00 2001 From: Stefan Reimer Date: Fri, 18 Sep 2020 14:21:39 +0100 Subject: [PATCH 05/10] Slightly allow ArgoCD a bit more processing --- charts/kubezero-argo-cd/README.md | 7 ++++--- charts/kubezero-argo-cd/values.yaml | 4 ++-- charts/kubezero-logging/README.md | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/charts/kubezero-argo-cd/README.md b/charts/kubezero-argo-cd/README.md index 0c49f57..d0df168 100644 --- a/charts/kubezero-argo-cd/README.md +++ b/charts/kubezero-argo-cd/README.md @@ -2,7 +2,7 @@ kubezero-argo-cd ================ KubeZero ArgoCD Helm chart to install ArgoCD itself and the KubeZero ArgoCD Application -Current chart version is `0.5.1` +Current chart version is `0.5.3` Source code can be found [here](https://kubezero.com) @@ -25,12 +25,13 @@ Source code can be found [here](https://kubezero.com) | argo-cd.controller.metrics.serviceMonitor.enabled | bool | `true` | | | argo-cd.controller.metrics.serviceMonitor.namespace | string | `"monitoring"` | | | argo-cd.controller.nodeSelector."node-role.kubernetes.io/master" | string | `""` | | -| argo-cd.controller.resources.limits.memory | string | `"512Mi"` | | +| argo-cd.controller.resources.limits.memory | string | `"1536Mi"` | | | argo-cd.controller.resources.requests.cpu | string | `"100m"` | | -| argo-cd.controller.resources.requests.memory | string | `"192Mi"` | | +| argo-cd.controller.resources.requests.memory | string | `"256Mi"` | | | argo-cd.controller.tolerations[0].effect | string | `"NoSchedule"` | | | argo-cd.controller.tolerations[0].key | string | `"node-role.kubernetes.io/master"` | | | argo-cd.dex.enabled | bool | `false` | | +| argo-cd.global.image.tag | string | `"v1.7.5"` | | | argo-cd.installCRDs | bool | `false` | | | argo-cd.istio.enabled | bool | `false` | Deploy Istio VirtualService to expose ArgoCD | | argo-cd.istio.gateway | string | `"istio-system/ingressgateway"` | Name of the Istio gateway to add the VirtualService to | diff --git a/charts/kubezero-argo-cd/values.yaml b/charts/kubezero-argo-cd/values.yaml index 572732c..0f01c2e 100644 --- a/charts/kubezero-argo-cd/values.yaml +++ b/charts/kubezero-argo-cd/values.yaml @@ -35,8 +35,8 @@ argo-cd: controller: args: - statusProcessors: "2" - operationProcessors: "1" + statusProcessors: "4" + operationProcessors: "2" appResyncPeriod: "300" metrics: diff --git a/charts/kubezero-logging/README.md b/charts/kubezero-logging/README.md index fc8ca85..89954b1 100644 --- a/charts/kubezero-logging/README.md +++ b/charts/kubezero-logging/README.md @@ -2,7 +2,7 @@ kubezero-logging ================ KubeZero Umbrella Chart for complete EFK stack -Current chart version is `0.3.5` +Current chart version is `0.3.6` Source code can be found [here](https://kubezero.com) -- 2.40.1 From 8210c7951cb87dcaeef310a628c7a8e81343d0d3 Mon Sep 17 00:00:00 2001 From: Stefan Reimer Date: Fri, 18 Sep 2020 14:57:33 +0100 Subject: [PATCH 06/10] Make KubeZero bootstrap flow work again --- deploy/deploy.sh | 14 ++++++-------- deploy/templates/values.yaml | 18 +++++++++--------- deploy/values-step-6.yaml | 3 +++ 3 files changed, 18 insertions(+), 17 deletions(-) create mode 100644 deploy/values-step-6.yaml diff --git a/deploy/deploy.sh b/deploy/deploy.sh index b1ea4e9..87bc412 100755 --- a/deploy/deploy.sh +++ b/deploy/deploy.sh @@ -8,7 +8,7 @@ function wait_for() { local TRIES=0 while true; do $@ && break - [ $TRIES -eq 100 ] && return 1 + [ $TRIES -eq 200 ] && return 1 let TRIES=$TRIES+1 sleep 3 done @@ -60,12 +60,12 @@ EOF wait_for kubectl get Issuer -n kube-system kubezero-local-ca-issuer 2>/dev/null 1>&2 wait_for kubectl get ClusterIssuer letsencrypt-dns-prod 2>/dev/null 1>&2 kubectl wait --for=condition=Ready -n kube-system Issuer/kubezero-local-ca-issuer - kubectl wait --for=condition=Ready ClusterIssuer/letsencrypt-dns-prod fi # Now that we have the cert-manager webhook, get the kiam certs in place but do NOT deploy kiam yet helm template $DEPLOY_DIR -f values.yaml -f cloudbender.yaml -f $DEPLOY_DIR/values-step-3.yaml > generated-values.yaml helm upgrade -n argocd kubezero kubezero/kubezero-argo-cd -f generated-values.yaml + kubectl wait --for=condition=Ready -n kube-system certificates/kiam-server # Now lets make sure kiam is working helm template $DEPLOY_DIR -f values.yaml -f cloudbender.yaml -f $DEPLOY_DIR/values-step-4.yaml > generated-values.yaml @@ -79,12 +79,10 @@ EOF wait_for kubectl get deployment -n istio-operator istio-operator 2>/dev/null 1>&2 kubectl rollout status deployment -n istio-operator istio-operator - # Todo: Now we need to wait till all is synced and healthy ... argocd cli or kubectl ? - # Wait for aws-ebs or kiam to be all ready, or all pods running ? - - # Todo: - # - integrate Prometheus-Grafana - # - integrate ES based logging + # Metrics + helm template $DEPLOY_DIR -f values.yaml -f cloudbender.yaml -f $DEPLOY_DIR/values-step-6.yaml > generated-values.yaml + helm upgrade -n argocd kubezero kubezero/kubezero-argo-cd -f generated-values.yaml + wait_for kubectl get crds servicemonitors.monitoring.coreos.com 2>/dev/null 1>&2 # Finally we could enable the actual config and deploy all helm template $DEPLOY_DIR -f values.yaml -f cloudbender.yaml > generated-values.yaml diff --git a/deploy/templates/values.yaml b/deploy/templates/values.yaml index 784dc5c..b971aad 100644 --- a/deploy/templates/values.yaml +++ b/deploy/templates/values.yaml @@ -8,7 +8,7 @@ kubezero: values: network: {{ default "vxlan" .Values.calico.network }} mtu: {{ default "8941" .Values.calico.mtu }} - prometheus: {{ default .Values.metrics.enabled .Values.metrics.ready }} + prometheus: {{ and .Values.metrics.enabled .Values.metrics.ready }} cert-manager: enabled: {{ index .Values "cert-manager" "enabled" }} values: @@ -56,11 +56,11 @@ kubezero: replicas: {{ ternary 2 1 .Values.HighAvailableControlplane }} prometheus: servicemonitor: - enabled: {{ default .Values.metrics.enabled .Values.metrics.ready }} + enabled: {{ and .Values.metrics.enabled .Values.metrics.ready }} agent: prometheus: servicemonitor: - enabled: {{ default .Values.metrics.enabled .Values.metrics.ready }} + enabled: {{ and .Values.metrics.enabled .Values.metrics.ready }} {{- if and .Values.kiam.enabled .Values.kiam.ready }} # AWS only components @@ -146,7 +146,7 @@ kubezero: {{- toYaml . | nindent 8 }} {{- end }} {{- end }} - prometheus: {{ default .Values.metrics.enabled .Values.metrics.ready }} + prometheus: {{ and .Values.metrics.enabled .Values.metrics.ready }} {{- if .Values.logging.es.s3Snapshot }} s3Snapshot: @@ -166,7 +166,7 @@ kubezero: fluentd: enabled: {{ .Values.logging.fluentd.enabled }} metrics: - enabled: {{ default .Values.metrics.enabled .Values.metrics.ready }} + enabled: {{ and .Values.metrics.enabled .Values.metrics.ready }} url: {{ .Values.logging.fluentd.url }} {{- if and .Values.logging.fluentd.istio .Values.istio.enabled .Values.istio.ready }} istio: @@ -178,7 +178,7 @@ kubezero: fluent-bit: enabled: {{ index .Values.logging "fluent-bit" "enabled" }} metrics: - enabled: {{ default .Values.metrics.enabled .Values.metrics.ready }} + enabled: {{ and .Values.metrics.enabled .Values.metrics.ready }} {{- if index .Values.logging "fluent-bit" "config" }} config: {{- with index .Values.logging "fluent-bit" "config" }} @@ -189,13 +189,13 @@ kubezero: argo-cd: controller: metrics: - enabled: {{ default .Values.metrics.enabled .Values.metrics.ready }} + enabled: {{ and .Values.metrics.enabled .Values.metrics.ready }} repoServer: metrics: - enabled: {{ default .Values.metrics.enabled .Values.metrics.ready }} + enabled: {{ and .Values.metrics.enabled .Values.metrics.ready }} server: metrics: - enabled: {{ default .Values.metrics.enabled .Values.metrics.ready }} + enabled: {{ and .Values.metrics.enabled .Values.metrics.ready }} {{- with index .Values "argo-cd" "server" }} {{- toYaml . | nindent 4 }} {{- end }} diff --git a/deploy/values-step-6.yaml b/deploy/values-step-6.yaml new file mode 100644 index 0000000..a97799e --- /dev/null +++ b/deploy/values-step-6.yaml @@ -0,0 +1,3 @@ +metrics: + enabled: true + ready: false -- 2.40.1 From 1a94359966a1f797f1570c5a38f1327fe41a4292 Mon Sep 17 00:00:00 2001 From: Stefan Reimer Date: Fri, 18 Sep 2020 15:04:33 +0100 Subject: [PATCH 07/10] Keep logging disabled after metrics to prevent rolling ES --- deploy/values-step-5.yaml | 3 +++ deploy/values-step-6.yaml | 3 +++ 2 files changed, 6 insertions(+) diff --git a/deploy/values-step-5.yaml b/deploy/values-step-5.yaml index d37a3c3..f1bfe0c 100644 --- a/deploy/values-step-5.yaml +++ b/deploy/values-step-5.yaml @@ -4,3 +4,6 @@ istio: metrics: enabled: false ready: false + +logging: + enabled: false diff --git a/deploy/values-step-6.yaml b/deploy/values-step-6.yaml index a97799e..5b08cd7 100644 --- a/deploy/values-step-6.yaml +++ b/deploy/values-step-6.yaml @@ -1,3 +1,6 @@ metrics: enabled: true ready: false + +logging: + enabled: false -- 2.40.1 From 4a918f6d83613b64c432b085a4671b7604b2c0ea Mon Sep 17 00:00:00 2001 From: Stefan Reimer Date: Fri, 18 Sep 2020 16:12:52 +0100 Subject: [PATCH 08/10] Logging fixes for NOT using nameoverride --- charts/kubezero-logging/values.yaml | 2 ++ deploy/templates/values.yaml | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/charts/kubezero-logging/values.yaml b/charts/kubezero-logging/values.yaml index 00f6e77..3f0f51d 100644 --- a/charts/kubezero-logging/values.yaml +++ b/charts/kubezero-logging/values.yaml @@ -70,6 +70,7 @@ fluentd: namespace: monitoring output: + # Default should be "logging-kubezero-logging-es-http" if fullnameOverride is NOT used host: logging-es-http shared_key: "cloudbender" @@ -78,6 +79,7 @@ fluentd: OUTPUT_USER: elastic OUTPUT_SSL_VERIFY: "false" + # Same here the secret names change if fullnameOverride is not used !! extraEnvVars: - name: OUTPUT_PASSWORD valueFrom: diff --git a/deploy/templates/values.yaml b/deploy/templates/values.yaml index b971aad..eed4491 100644 --- a/deploy/templates/values.yaml +++ b/deploy/templates/values.yaml @@ -168,6 +168,14 @@ kubezero: metrics: enabled: {{ and .Values.metrics.enabled .Values.metrics.ready }} url: {{ .Values.logging.fluentd.url }} + {{- if .Values.logging.fluentd.output }} + output: + host: {{ .Values.logging.fluentd.output.host }} + {{- end }} + {{- if .Values.logging.fluentd.extraEnvVars }} + extraEnvVars: + {{- toYaml .Values.logging.fluentd.extraEnvVars | nindent 10 }} + {{- end }} {{- if and .Values.logging.fluentd.istio .Values.istio.enabled .Values.istio.ready }} istio: {{- with .Values.logging.fluentd.istio }} -- 2.40.1 From a5952f850dbc974cb57b8bc3c721acbb8264c827 Mon Sep 17 00:00:00 2001 From: Stefan Reimer Date: Fri, 18 Sep 2020 16:18:59 +0100 Subject: [PATCH 09/10] Make the kiam annotate namespace job optional --- charts/kubezero-kiam/Chart.yaml | 2 +- charts/kubezero-kiam/templates/sync-ns.yaml | 2 ++ charts/kubezero-kiam/values.yaml | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/charts/kubezero-kiam/Chart.yaml b/charts/kubezero-kiam/Chart.yaml index 812628f..ba2ddfc 100644 --- a/charts/kubezero-kiam/Chart.yaml +++ b/charts/kubezero-kiam/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: kubezero-kiam description: KubeZero Umbrella Chart for Kiam type: application -version: 0.2.10 +version: 0.2.11 appVersion: 3.6 home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png diff --git a/charts/kubezero-kiam/templates/sync-ns.yaml b/charts/kubezero-kiam/templates/sync-ns.yaml index cc270a0..b14d982 100644 --- a/charts/kubezero-kiam/templates/sync-ns.yaml +++ b/charts/kubezero-kiam/templates/sync-ns.yaml @@ -1,3 +1,4 @@ +{{- if .Values.annotateKubeSystemNameSpace }} apiVersion: v1 kind: ServiceAccount metadata: @@ -63,3 +64,4 @@ spec: tolerations: - effect: NoSchedule key: node-role.kubernetes.io/master +{{- end }} diff --git a/charts/kubezero-kiam/values.yaml b/charts/kubezero-kiam/values.yaml index b96d4f5..d09c24b 100644 --- a/charts/kubezero-kiam/values.yaml +++ b/charts/kubezero-kiam/values.yaml @@ -1,3 +1,5 @@ +annotateKubeSystemNameSpace: false + kiam: enabled: true server: -- 2.40.1 From 6d769e3a22f02d2e609c226974b1b85377acf9b7 Mon Sep 17 00:00:00 2001 From: Stefan Reimer Date: Fri, 18 Sep 2020 16:20:45 +0100 Subject: [PATCH 10/10] Add kube-system ns annotate to boot flow --- deploy/deploy.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/deploy/deploy.sh b/deploy/deploy.sh index 87bc412..40e3121 100755 --- a/deploy/deploy.sh +++ b/deploy/deploy.sh @@ -62,6 +62,9 @@ EOF kubectl wait --for=condition=Ready -n kube-system Issuer/kubezero-local-ca-issuer fi + # Make sure kube-system is allowed to kiam + kubectl annotate --overwrite namespace kube-system 'iam.amazonaws.com/permitted=.*' + # Now that we have the cert-manager webhook, get the kiam certs in place but do NOT deploy kiam yet helm template $DEPLOY_DIR -f values.yaml -f cloudbender.yaml -f $DEPLOY_DIR/values-step-3.yaml > generated-values.yaml helm upgrade -n argocd kubezero kubezero/kubezero-argo-cd -f generated-values.yaml -- 2.40.1