From 144d826aafd5cf3a59a7611b47d91c62eb33c70f Mon Sep 17 00:00:00 2001 From: Stefan Reimer Date: Wed, 4 Dec 2024 18:40:10 +0000 Subject: [PATCH] fix: various tweaks and fixes --- charts/kubezero-addons/values.yaml | 3 ++ charts/kubezero-metrics/README.md | 7 +-- .../jsonnet/jsonnetfile.lock.json | 32 ++++++------- .../kubezero-metrics/jsonnet/rules.libsonnet | 45 +++++++++++++++---- .../jsonnet/rules/kubernetes-prometheusRule | 18 ++++++-- .../rules/prometheus-operator-prometheusRule | 2 +- .../jsonnet/rules/prometheus-prometheusRule | 6 +-- .../templates/rules/kubernetes.yaml | 14 ++++-- .../templates/rules/prometheus.yaml | 4 +- .../templates/rules/zdt-inhibitors.yaml | 19 -------- charts/kubezero-metrics/values.yaml | 6 +-- charts/kubezero/templates/addons.yaml | 19 +++++--- charts/kubezero/templates/operators.yaml | 10 +++++ charts/kubezero/templates/telemetry.yaml | 3 ++ 14 files changed, 120 insertions(+), 68 deletions(-) delete mode 100644 charts/kubezero-metrics/templates/rules/zdt-inhibitors.yaml diff --git a/charts/kubezero-addons/values.yaml b/charts/kubezero-addons/values.yaml index d545a2f3..7e28e40f 100644 --- a/charts/kubezero-addons/values.yaml +++ b/charts/kubezero-addons/values.yaml @@ -227,7 +227,10 @@ cluster-autoscaler: scan-interval: 30s skip-nodes-with-local-storage: false balance-similar-node-groups: true + ignore-daemonsets-utilization: true ignore-taint: "node.cilium.io/agent-not-ready" + # Disable for non-clustered control-plane + # leader-elect: false #securityContext: # runAsNonRoot: true diff --git a/charts/kubezero-metrics/README.md b/charts/kubezero-metrics/README.md index a0f02985..47f98bf1 100644 --- a/charts/kubezero-metrics/README.md +++ b/charts/kubezero-metrics/README.md @@ -92,10 +92,11 @@ Kubernetes: `>= 1.26.0` | kube-prometheus-stack.alertmanager.config.inhibit_rules[2].target_matchers[0] | string | `"severity = info"` | | | kube-prometheus-stack.alertmanager.config.inhibit_rules[3].source_matchers[0] | string | `"alertname = ClusterAutoscalerNodeGroupsEnabled"` | | | kube-prometheus-stack.alertmanager.config.inhibit_rules[3].target_matchers[0] | string | `"alertname =~ \"KubeCPUOvercommit|KubeMemoryOvercommit\""` | | -| kube-prometheus-stack.alertmanager.config.route.group_by[0] | string | `"severity"` | | -| kube-prometheus-stack.alertmanager.config.route.group_by[1] | string | `"clusterName"` | | +| kube-prometheus-stack.alertmanager.config.route.group_by[0] | string | `"alertname"` | | +| kube-prometheus-stack.alertmanager.config.route.group_by[1] | string | `"severity"` | | +| kube-prometheus-stack.alertmanager.config.route.group_by[2] | string | `"status"` | | | kube-prometheus-stack.alertmanager.config.route.group_interval | string | `"5m"` | | -| kube-prometheus-stack.alertmanager.config.route.group_wait | string | `"10s"` | | +| kube-prometheus-stack.alertmanager.config.route.group_wait | string | `"15s"` | | | kube-prometheus-stack.alertmanager.config.route.repeat_interval | string | `"4h"` | | | kube-prometheus-stack.alertmanager.config.route.routes[0].matchers[0] | string | `"severity = none"` | | | kube-prometheus-stack.alertmanager.config.route.routes[0].receiver | string | `"null"` | | diff --git a/charts/kubezero-metrics/jsonnet/jsonnetfile.lock.json b/charts/kubezero-metrics/jsonnet/jsonnetfile.lock.json index 2e2df81b..64c88cb6 100644 --- a/charts/kubezero-metrics/jsonnet/jsonnetfile.lock.json +++ b/charts/kubezero-metrics/jsonnet/jsonnetfile.lock.json @@ -18,7 +18,7 @@ "subdir": "contrib/mixin" } }, - "version": "19aa0dbe8fd6317a237bae9b6ea52a4f1b445b19", + "version": "a45cc4961e827009dba7d164e441a0ebe2eef870", "sum": "IXI3LQIT9NmTPJAk8WLUJd5+qZfcGpeNCyWIK7oEpws=" }, { @@ -88,7 +88,7 @@ "subdir": "grafana-builder" } }, - "version": "5a6b86b475e427b2dbd9e4af0bcafbb6da0507a5", + "version": "767befa8fb46a07be516dec2777d7d89909a529d", "sum": "yxqWcq/N3E/a/XreeU6EuE6X7kYPnG0AspAQFKOjASo=" }, { @@ -118,8 +118,8 @@ "subdir": "" } }, - "version": "bdbf7f45cedf37d07567be7519fa4139043f9335", - "sum": "j4EAKfqkbPvBFGnBjt4hex2bdNHPpuFWrCxfq5L6EkU=" + "version": "a3fbf21977deb89b7d843eb8371170c011ea6835", + "sum": "57zW2IGJ9zbYd8BI0qe6JkoWTRSMNiBUWC6+YcnEsWo=" }, { "source": { @@ -128,7 +128,7 @@ "subdir": "jsonnet/kube-state-metrics" } }, - "version": "17151aca659e0659259b5e1f5675acf849281ade", + "version": "32e7727ff4613b0f55dfc18aff15afb8c04d03c5", "sum": "lO7jUSzAIy8Yk9pOWJIWgPRhubkWzVh56W6wtYfbVH4=" }, { @@ -138,7 +138,7 @@ "subdir": "jsonnet/kube-state-metrics-mixin" } }, - "version": "17151aca659e0659259b5e1f5675acf849281ade", + "version": "32e7727ff4613b0f55dfc18aff15afb8c04d03c5", "sum": "qclI7LwucTjBef3PkGBkKxF0mfZPbHnn4rlNWKGtR4c=" }, { @@ -148,8 +148,8 @@ "subdir": "jsonnet/kube-prometheus" } }, - "version": "c503e5cc5403dd5d56b1c0c5933827baee64aeaf", - "sum": "fJqINQiYJPmllXFFO+Hl5HrPYANMbhHFUQ28tl0Vi00=" + "version": "7e5a571a3fb735c78e17c76a637eb7e8bb5dd086", + "sum": "uTw/Mj+X91S+oqUpAX81xcfWPDlox0tdSZY/YBw7nGE=" }, { "source": { @@ -158,7 +158,7 @@ "subdir": "jsonnet/mixin" } }, - "version": "e951bd3037a053fea681510ccde211c28dc657e1", + "version": "a366602bacb2c8d773a9cee058b6971b8d2e3732", "sum": "gi+knjdxs2T715iIQIntrimbHRgHnpM8IFBJDD1gYfs=", "name": "prometheus-operator-mixin" }, @@ -169,8 +169,8 @@ "subdir": "jsonnet/prometheus-operator" } }, - "version": "e951bd3037a053fea681510ccde211c28dc657e1", - "sum": "YOJjmladGD1PcgNae0h88Mm235CsZSfwf2a4DIcMJFU=" + "version": "a366602bacb2c8d773a9cee058b6971b8d2e3732", + "sum": "z0/lCiMusMHTqntsosMVGYkVcSZjCpyZBmUMVUsK5nA=" }, { "source": { @@ -179,7 +179,7 @@ "subdir": "doc/alertmanager-mixin" } }, - "version": "f6b942cf9b3a503d59192eada300d2ad97cba82f", + "version": "0f65e8fa5fc72d2678655105c0213b416ca6f34c", "sum": "Mf4h1BYLle2nrgjf/HXrBbl0Zk8N+xaoEM017o0BC+k=", "name": "alertmanager" }, @@ -190,7 +190,7 @@ "subdir": "docs/node-mixin" } }, - "version": "49d177bf95417b117ab612a376e2434d5dd61c2d", + "version": "cf8c6891cc610e54f70383addd4bb6079f0add35", "sum": "cQCW+1N0Xae5yXecCWDK2oAlN0luBS/5GrwBYSlaFms=" }, { @@ -200,8 +200,8 @@ "subdir": "documentation/prometheus-mixin" } }, - "version": "789c9b1a5e455850ed9b3c89cafb37df75ce1e50", - "sum": "dYLcLzGH4yF3qB7OGC/7z4nqeTNjv42L7Q3BENU8XJI=", + "version": "b407c2930da4f50c0d17fc39404c6302a9eb740b", + "sum": "OYT5u3S8DbamuJV/v3gbWSteOvFzMeNwMj+u4Apk7jM=", "name": "prometheus" }, { @@ -222,7 +222,7 @@ "subdir": "mixin" } }, - "version": "f9da21ec0b28073875520159fe72ab744c255b2e", + "version": "7037331e6ea7dbe85a1b7af37bf8ea277a80663d", "sum": "ieCD4eMgGbOlrI8GmckGPHBGQDcLasE1rULYq56W/bs=", "name": "thanos-mixin" } diff --git a/charts/kubezero-metrics/jsonnet/rules.libsonnet b/charts/kubezero-metrics/jsonnet/rules.libsonnet index 63576cc3..fabe38cf 100644 --- a/charts/kubezero-metrics/jsonnet/rules.libsonnet +++ b/charts/kubezero-metrics/jsonnet/rules.libsonnet @@ -29,14 +29,43 @@ local etcdMixin = addMixin({ }, }); -local kp = (import 'kube-prometheus/main.libsonnet') + - { - values+:: { - common+: { - namespace: 'monitoring', - }, - }, - }; +local kp = (import 'kube-prometheus/main.libsonnet') + { + values+:: { + common+: { + namespace: 'monitoring', + }, + }, + kubernetesControlPlane+: { + prometheusRule+: { + spec+: { + groups: [ + ( + if group.name == 'kubernetes-resources' then + group { + rules: [ + { + alert: 'ClusterAutoscalerNodeGroupsEnabled', + expr: 'cluster_autoscaler_node_groups_count{job="addons-aws-cluster-autoscaler",node_group_type="autoscaled"} > 0 or vector(1)', + 'for': '5m', + labels: { + severity: 'none', + }, + annotations: { + description: 'Inhibitor rule if the Cluster Autoscaler found at least one node group', + summary: 'Cluster Autoscaler found at least one node group.', + }, + }, + ] + super.rules, + } + else + group + ) + for group in super.groups + ], + }, + }, + }, +}; // We just want the Prometheus Rules { 'prometheus-operator-prometheusRule': kp.prometheusOperator.prometheusRule } + diff --git a/charts/kubezero-metrics/jsonnet/rules/kubernetes-prometheusRule b/charts/kubezero-metrics/jsonnet/rules/kubernetes-prometheusRule index a081c9c7..0e0e0cc2 100644 --- a/charts/kubezero-metrics/jsonnet/rules/kubernetes-prometheusRule +++ b/charts/kubezero-metrics/jsonnet/rules/kubernetes-prometheusRule @@ -123,7 +123,7 @@ { "alert": "KubeDaemonSetRolloutStuck", "annotations": { - "description": "DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15 minutes.", + "description": "DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15m.", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck", "summary": "DaemonSet rollout is stuck." }, @@ -228,6 +228,18 @@ { "name": "kubernetes-resources", "rules": [ + { + "alert": "ClusterAutoscalerNodeGroupsEnabled", + "annotations": { + "description": "Inhibitor rule if the Cluster Autoscaler found at least one node group", + "summary": "Cluster Autoscaler found at least one node group." + }, + "expr": "cluster_autoscaler_node_groups_count{job=\"addons-aws-cluster-autoscaler\",node_group_type=\"autoscaled\"} > 0", + "for": "5m", + "labels": { + "severity": "none" + } + }, { "alert": "KubeCPUOvercommit", "annotations": { @@ -506,7 +518,7 @@ { "alert": "KubeClientCertificateExpiration", "annotations": { - "description": "A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days.", + "description": "A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days on cluster {{ $labels.cluster }}.", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration", "summary": "Client certificate is about to expire." }, @@ -519,7 +531,7 @@ { "alert": "KubeClientCertificateExpiration", "annotations": { - "description": "A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours.", + "description": "A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours on cluster {{ $labels.cluster }}.", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration", "summary": "Client certificate is about to expire." }, diff --git a/charts/kubezero-metrics/jsonnet/rules/prometheus-operator-prometheusRule b/charts/kubezero-metrics/jsonnet/rules/prometheus-operator-prometheusRule index e8c6386a..7b086fcd 100644 --- a/charts/kubezero-metrics/jsonnet/rules/prometheus-operator-prometheusRule +++ b/charts/kubezero-metrics/jsonnet/rules/prometheus-operator-prometheusRule @@ -6,7 +6,7 @@ "app.kubernetes.io/component": "controller", "app.kubernetes.io/name": "prometheus-operator", "app.kubernetes.io/part-of": "kube-prometheus", - "app.kubernetes.io/version": "0.78.1", + "app.kubernetes.io/version": "0.78.2", "prometheus": "k8s", "role": "alert-rules" }, diff --git a/charts/kubezero-metrics/jsonnet/rules/prometheus-prometheusRule b/charts/kubezero-metrics/jsonnet/rules/prometheus-prometheusRule index f0bd9ad8..4a2ece62 100644 --- a/charts/kubezero-metrics/jsonnet/rules/prometheus-prometheusRule +++ b/charts/kubezero-metrics/jsonnet/rules/prometheus-prometheusRule @@ -7,7 +7,7 @@ "app.kubernetes.io/instance": "k8s", "app.kubernetes.io/name": "prometheus", "app.kubernetes.io/part-of": "kube-prometheus", - "app.kubernetes.io/version": "2.55.1", + "app.kubernetes.io/version": "3.0.1", "prometheus": "k8s", "role": "alert-rules" }, @@ -74,9 +74,9 @@ { "alert": "PrometheusErrorSendingAlertsToSomeAlertmanagers", "annotations": { - "description": "{{ printf \"%.1f\" $value }}% errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.", + "description": "{{ printf \"%.1f\" $value }}% of alerts sent by Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}} were affected by errors.", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstosomealertmanagers", - "summary": "Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager." + "summary": "More than 1% of alerts sent by Prometheus to a specific Alertmanager were affected by errors." }, "expr": "(\n rate(prometheus_notifications_errors_total{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m])\n/\n rate(prometheus_notifications_sent_total{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m])\n)\n* 100\n> 1\n", "for": "15m", diff --git a/charts/kubezero-metrics/templates/rules/kubernetes.yaml b/charts/kubezero-metrics/templates/rules/kubernetes.yaml index 20d0a373..a40e75c5 100644 --- a/charts/kubezero-metrics/templates/rules/kubernetes.yaml +++ b/charts/kubezero-metrics/templates/rules/kubernetes.yaml @@ -89,7 +89,7 @@ spec: severity: warning - alert: KubeDaemonSetRolloutStuck annotations: - description: DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15 minutes. + description: DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15m. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck summary: DaemonSet rollout is stuck. expr: "(\n (\n kube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_misscheduled{job=\"kube-state-metrics\"}\n !=\n 0\n ) or (\n kube_daemonset_status_updated_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_available{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n )\n) and (\n changes(kube_daemonset_status_updated_number_scheduled{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n" @@ -166,6 +166,14 @@ spec: severity: warning - name: kubernetes-resources rules: + - alert: ClusterAutoscalerNodeGroupsEnabled + annotations: + description: Inhibitor rule if the Cluster Autoscaler found at least one node group + summary: Cluster Autoscaler found at least one node group. + expr: cluster_autoscaler_node_groups_count{job="addons-aws-cluster-autoscaler",node_group_type="autoscaled"} > 0 + for: 5m + labels: + severity: none - alert: KubeCPUOvercommit annotations: description: Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted CPU resource requests for Pods by {{`{{`}} $value {{`}}`}} CPU shares and cannot tolerate node failure. @@ -395,7 +403,7 @@ spec: rules: - alert: KubeClientCertificateExpiration annotations: - description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days. + description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days on cluster {{`{{`}} $labels.cluster {{`}}`}}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration summary: Client certificate is about to expire. expr: 'histogram_quantile(0.01, sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800 @@ -410,7 +418,7 @@ spec: severity: warning - alert: KubeClientCertificateExpiration annotations: - description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours. + description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours on cluster {{`{{`}} $labels.cluster {{`}}`}}. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration summary: Client certificate is about to expire. expr: 'histogram_quantile(0.01, sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400 diff --git a/charts/kubezero-metrics/templates/rules/prometheus.yaml b/charts/kubezero-metrics/templates/rules/prometheus.yaml index 9509a2b2..3befaf39 100644 --- a/charts/kubezero-metrics/templates/rules/prometheus.yaml +++ b/charts/kubezero-metrics/templates/rules/prometheus.yaml @@ -57,9 +57,9 @@ spec: severity: warning - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers annotations: - description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.alertmanager{{`}}`}}.' + description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% of alerts sent by Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.alertmanager{{`}}`}} were affected by errors.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstosomealertmanagers - summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager. + summary: More than 1% of alerts sent by Prometheus to a specific Alertmanager were affected by errors. expr: "(\n rate(prometheus_notifications_errors_total{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m])\n/\n rate(prometheus_notifications_sent_total{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m])\n)\n* 100\n> 1\n" for: 15m labels: diff --git a/charts/kubezero-metrics/templates/rules/zdt-inhibitors.yaml b/charts/kubezero-metrics/templates/rules/zdt-inhibitors.yaml deleted file mode 100644 index 30509f18..00000000 --- a/charts/kubezero-metrics/templates/rules/zdt-inhibitors.yaml +++ /dev/null @@ -1,19 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ printf "%s-%s" (include "kubezero-lib.fullname" $) "zdt-inhibitors" | trunc 63 | trimSuffix "-" }} - namespace: {{ .Release.Namespace }} - labels: - {{- include "kubezero-lib.labels" . | nindent 4 }} -spec: - groups: - - name: zdt-inhibitors - rules: - - alert: ClusterAutoscalerNodeGroupsEnabled - annotations: - description: "This rule is meant to inhibit other rules and should not be forwarded.\nThe Cluster Autoscaler found at least one node group" - summary: Cluster Autoscaler found at least one node group. - expr: 'cluster_autoscaler_node_groups_count{job="addons-aws-cluster-autoscaler",node_group_type="autoscaled"} > 0' - for: 15m - labels: - severity: none diff --git a/charts/kubezero-metrics/values.yaml b/charts/kubezero-metrics/values.yaml index a3aa22e9..f249045e 100644 --- a/charts/kubezero-metrics/values.yaml +++ b/charts/kubezero-metrics/values.yaml @@ -223,8 +223,8 @@ kube-prometheus-stack: global: resolve_timeout: 5m route: - group_by: ['severity', 'clusterName'] - group_wait: 10s + group_by: ['alertname', 'severity', 'status'] + group_wait: 30s group_interval: 5m repeat_interval: 4h routes: @@ -252,7 +252,7 @@ kube-prometheus-stack: - alertname = InfoInhibitor target_matchers: - severity = info - # Disable cluster overcommiy alerts if we have cluster autoscaler available + # Disable cluster overcommit alerts if we have cluster autoscaler available - source_matchers: - alertname = ClusterAutoscalerNodeGroupsEnabled target_matchers: diff --git a/charts/kubezero/templates/addons.yaml b/charts/kubezero/templates/addons.yaml index 570e7472..8573b86d 100644 --- a/charts/kubezero/templates/addons.yaml +++ b/charts/kubezero/templates/addons.yaml @@ -6,7 +6,7 @@ clusterBackup: {{- toYaml . | nindent 2 }} {{- end }} - {{- if .Values.global.aws.region }} + {{- if eq .Values.global.platform "aws" }} # AWS extraEnv: - name: AWS_DEFAULT_REGION @@ -20,7 +20,7 @@ forseti: {{- toYaml . | nindent 2 }} {{- end }} - {{- if .Values.global.aws.region }} + {{- if eq .Values.global.platform "aws" }} # AWS aws: region: {{ $.Values.global.aws.region }} @@ -34,7 +34,7 @@ external-dns: {{- toYaml . | nindent 2 }} {{- end }} - {{- if .Values.global.aws.region }} + {{- if eq .Values.global.platform "aws" }} # AWS txtOwnerId: {{ .Values.global.clusterName }} provider: aws @@ -67,13 +67,18 @@ external-dns: cluster-autoscaler: enabled: {{ ternary "true" "false" (or (hasKey .Values.global.aws "region") (index .Values "addons" "cluster-autoscaler" "enabled")) }} + autoDiscovery: + clusterName: {{ .Values.global.clusterName }} + + {{- if not .Values.global.highAvailable }} + extraArgs: + leader-elect: false + {{- end }} + {{- with omit (index .Values "addons" "cluster-autoscaler") "enabled" }} {{- toYaml . | nindent 2 }} {{- end }} - autoDiscovery: - clusterName: {{ .Values.global.clusterName }} - {{- with .Values.metrics }} serviceMonitor: enabled: {{ .enabled }} @@ -82,7 +87,7 @@ cluster-autoscaler: # enabled: {{ .enabled }} {{- end }} - {{- if .Values.global.aws.region }} + {{- if eq .Values.global.platform "aws" }} # AWS awsRegion: {{ .Values.global.aws.region }} diff --git a/charts/kubezero/templates/operators.yaml b/charts/kubezero/templates/operators.yaml index c01537af..cd481428 100644 --- a/charts/kubezero/templates/operators.yaml +++ b/charts/kubezero/templates/operators.yaml @@ -20,6 +20,16 @@ cloudnative-pg: {{- end }} {{- end }} +{{- with index .Values "operators" "strimzi-kafka-operator" }} +strimzi-kafka-operator: + {{- toYaml . | nindent 2 }} + + {{- with $.Values.metrics }} + monitoring: + podMonitorEnabled: {{ .enabled }} + {{- end }} +{{- end }} + {{- end }} diff --git a/charts/kubezero/templates/telemetry.yaml b/charts/kubezero/templates/telemetry.yaml index d2ae8259..c47edcef 100644 --- a/charts/kubezero/templates/telemetry.yaml +++ b/charts/kubezero/templates/telemetry.yaml @@ -1,5 +1,8 @@ {{- define "telemetry-values" }} +metrics: + enabled: {{ .Values.metrics.enabled }} + {{- if index .Values "telemetry" "fluent-bit" }} fluent-bit: {{- with index .Values.telemetry "fluent-bit" }}