From c97c9e6d913c0bff4744edaf7924ca2dde11ceb9 Mon Sep 17 00:00:00 2001 From: Stefan Reimer Date: Wed, 29 Sep 2021 16:33:33 +0200 Subject: [PATCH] fix(metrics): More fixes around AlertLabels --- charts/kubezero-metrics/Chart.yaml | 2 +- charts/kubezero-metrics/README.md | 2 +- charts/kubezero-metrics/configs/build.sh | 13 +- .../configs/jsonnetfile.lock.json | 6 +- .../kubezero-metrics/configs/k8s-rules.yaml | 4 +- .../configs/metrics-dashboards.yaml | 2 +- .../configs/zdt-dashboards.yaml | 1 + .../kubezero-metrics/sync_prometheus_rules.py | 12 +- .../templates/grafana-dashboards-metrics.yaml | 2 +- .../templates/grafana-dashboards-zdt.yaml | 2 + .../templates/rules/alertmanager.yaml | 24 -- .../templates/rules/etcd-mixin.yaml | 36 --- .../templates/rules/kube-prometheus.yaml | 9 - .../templates/rules/kube-state-metrics.yaml | 12 - .../templates/rules/kubernetes.yaml | 282 ------------------ .../templates/rules/node-exporter.yaml | 54 ---- .../templates/rules/prometheus-operator.yaml | 21 -- .../templates/rules/prometheus.yaml | 54 ---- charts/kubezero-metrics/values.yaml | 6 +- 19 files changed, 20 insertions(+), 524 deletions(-) diff --git a/charts/kubezero-metrics/Chart.yaml b/charts/kubezero-metrics/Chart.yaml index eeddf7b..00061d4 100644 --- a/charts/kubezero-metrics/Chart.yaml +++ b/charts/kubezero-metrics/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: kubezero-metrics description: KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all Kubernetes integrations. type: application -version: 0.5.2 +version: 0.5.4 home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png keywords: diff --git a/charts/kubezero-metrics/README.md b/charts/kubezero-metrics/README.md index 91b1ea1..afe820f 100644 --- a/charts/kubezero-metrics/README.md +++ b/charts/kubezero-metrics/README.md @@ -1,6 +1,6 @@ # kubezero-metrics -![Version: 0.5.2](https://img.shields.io/badge/Version-0.5.2-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) +![Version: 0.5.4](https://img.shields.io/badge/Version-0.5.4-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all Kubernetes integrations. diff --git a/charts/kubezero-metrics/configs/build.sh b/charts/kubezero-metrics/configs/build.sh index 6d3bc97..3869153 100755 --- a/charts/kubezero-metrics/configs/build.sh +++ b/charts/kubezero-metrics/configs/build.sh @@ -1,16 +1,15 @@ #!/bin/bash -ex -JB='./jb-linux-amd64' +which jsonnet > /dev/null || { echo "Required jsonnet not found!"; exit 1;} +which jb > /dev/null || { echo "Required jb ( json-bundler ) not found!"; exit 1;} -which jsonnet > /dev/null || { echo "Required jsonnet not found!"; } -[ -x $JB ] || { wget https://github.com/jsonnet-bundler/jsonnet-bundler/releases/download/v0.4.0/jb-linux-amd64 && chmod +x $JB; } -#which gojsontoyaml || go install github.com/brancz/gojsontoyaml@latest +# wget https://github.com/jsonnet-bundler/jsonnet-bundler/releases/download/v0.4.0/jb-linux-amd64 -[ -r jsonnetfile.json ] || $JB init +[ -r jsonnetfile.json ] || jb init if [ -r jsonnetfile.lock.json ]; then - $JB update + jb update else - $JB install github.com/prometheus-operator/kube-prometheus/jsonnet/kube-prometheus@main + jb install github.com/prometheus-operator/kube-prometheus/jsonnet/kube-prometheus@main fi rm -rf dashboards && mkdir -p dashboards diff --git a/charts/kubezero-metrics/configs/jsonnetfile.lock.json b/charts/kubezero-metrics/configs/jsonnetfile.lock.json index cb8f160..ddce81e 100644 --- a/charts/kubezero-metrics/configs/jsonnetfile.lock.json +++ b/charts/kubezero-metrics/configs/jsonnetfile.lock.json @@ -38,7 +38,7 @@ "subdir": "grafana-builder" } }, - "version": "bc9b685050691a78ee414cd8f789857de0eabe8d", + "version": "b7eae75972a369bf8ebfb03dcb0d4c14464ef85a", "sum": "GRf2GvwEU4jhXV+JOonXSZ4wdDv8mnHBPCQ6TUVd+g8=" }, { @@ -98,7 +98,7 @@ "subdir": "jsonnet/mixin" } }, - "version": "c6b5cc2e178002683ab3afe8d023a8bb2508ba19", + "version": "42fc15967e35e0cca68cf935f844086edbc82d0e", "sum": "6reUygVmQrLEWQzTKcH8ceDbvM+2ztK3z2VBR2K2l+U=", "name": "prometheus-operator-mixin" }, @@ -109,7 +109,7 @@ "subdir": "jsonnet/prometheus-operator" } }, - "version": "c6b5cc2e178002683ab3afe8d023a8bb2508ba19", + "version": "42fc15967e35e0cca68cf935f844086edbc82d0e", "sum": "sECNXs/aIEreFUma1BWVyknBygqh3AVJEB3msmrAYYY=" }, { diff --git a/charts/kubezero-metrics/configs/k8s-rules.yaml b/charts/kubezero-metrics/configs/k8s-rules.yaml index cef2ba5..c919591 100644 --- a/charts/kubezero-metrics/configs/k8s-rules.yaml +++ b/charts/kubezero-metrics/configs/k8s-rules.yaml @@ -2,8 +2,6 @@ rules: - name: alertmanager url: file://rules/alertmanager-prometheusRule -- name: etcd-mixin - url: file://rules/etcd-mixin-prometheusRule - name: kube-prometheus url: file://rules/kube-prometheus-prometheusRule - name: kube-state-metrics @@ -16,3 +14,5 @@ rules: url: file://rules/prometheus-operator-prometheusRule - name: prometheus url: file://rules/prometheus-prometheusRule +- name: etcd-mixin + url: file://rules/etcd-mixin-prometheusRule diff --git a/charts/kubezero-metrics/configs/metrics-dashboards.yaml b/charts/kubezero-metrics/configs/metrics-dashboards.yaml index 7310eec..8d0f685 100644 --- a/charts/kubezero-metrics/configs/metrics-dashboards.yaml +++ b/charts/kubezero-metrics/configs/metrics-dashboards.yaml @@ -1,6 +1,6 @@ configmap: grafana-dashboards-metrics gzip: true -folder: Metrics +folder: KubeZero dashboards: - name: AlertManager url: https://grafana.com/api/dashboards/9578/revisions/4/download diff --git a/charts/kubezero-metrics/configs/zdt-dashboards.yaml b/charts/kubezero-metrics/configs/zdt-dashboards.yaml index 5f45fee..de1c250 100644 --- a/charts/kubezero-metrics/configs/zdt-dashboards.yaml +++ b/charts/kubezero-metrics/configs/zdt-dashboards.yaml @@ -1,5 +1,6 @@ configmap: grafana-dashboards-zdt gzip: true +folder: KubeZero dashboards: - name: home url: file://zdt/home.json diff --git a/charts/kubezero-metrics/sync_prometheus_rules.py b/charts/kubezero-metrics/sync_prometheus_rules.py index fc601b0..92beb09 100755 --- a/charts/kubezero-metrics/sync_prometheus_rules.py +++ b/charts/kubezero-metrics/sync_prometheus_rules.py @@ -61,19 +61,9 @@ for r in config['rules']: # Encode {{ }} for helm text = text.replace("{{", "{{`{{").replace("}}", "}}`}}").replace("{{`{{", "{{`{{`}}").replace("}}`}}", "{{`}}`}}") - text = textwrap.indent(text, ' '*2) - - # add support for additionalRuleLabels from Helm values - helm_labels = '''{{- if .Values.additionalRuleLabels }} -{{- toYaml .Values.additionalRuleLabels | nindent 8 }} -{{- end }} -''' - helm_labels = textwrap.indent(helm_labels, ' '*8) - - text = text.replace("labels:\n", "labels:\n{}".format(helm_labels)) rule += '''spec:\n''' - rule += text+'\n' + rule += textwrap.indent(text, ' '*2)+'\n' if 'condition' in r: rule += '{{- end }}'+'\n' diff --git a/charts/kubezero-metrics/templates/grafana-dashboards-metrics.yaml b/charts/kubezero-metrics/templates/grafana-dashboards-metrics.yaml index 5bbee50..b83cf5a 100644 --- a/charts/kubezero-metrics/templates/grafana-dashboards-metrics.yaml +++ b/charts/kubezero-metrics/templates/grafana-dashboards-metrics.yaml @@ -7,7 +7,7 @@ metadata: grafana_dashboard: "1" {{- include "kubezero-lib.labels" . | nindent 4 }} annotations: - k8s-sidecar-target-directory: Metrics + k8s-sidecar-target-directory: KubeZero binaryData: AlertManager.json.gz: H4sIAAAAAAAC/+2deXPbRpqH/9en6IWzO3JWlkjwTlWmypYniXdzTexJ1WzspUGwSWEMAjQO24zK+9m3uwEQVwMgKYgmoN9WbcYCiKPv52m8DdyeEaJMp4a19j1X+Yb88eYi2OLQ977hULGNbSHkVvyX7fM2a8q2KktHW2iWplxEO4y5dLOlrcTvv8/u+EAd17Atvm942b3sKGLH5wv55daaRU3JxdY30kslNycuVH6NueZpru07Os1eaO3YK+rdUN/NX+1Xyb7EJVnKDkqba1hLk7qe5uUv+VKyb+dUSq/maTOT5i/0Kr05fw3236DKaJZls/the3mdCS6qmIbrbWtQfCtsz8w3TO8FP1P3It6aKAB5xrLfUEvc0zfEc3ya2H5jzCVbDd22rm3TdvgJneVMO+9cELXbZf8ZDC5I93Hy1FGqn8ZpIf9BnprU8VK3ENcX92Zma85cCfd9vsin9C5pop/W4sb1G81aUvecVUOduu6UlbzjTT1jRacuZSmcu7fEsNhWS6ff/t9r5avoj9fK5z/U1ZvH5K+ko+Qza6GZbmFuPZoturNOR5ZBv1FxB6k0uDf2R1GgneRGj65FI1il8k9b/jfd8HqiRDea2m14Jv3Odlaal7jYNo/F/745C3NbmVNXd4y1F9bL51GZEH5DrJ2QOMuDolyxXmhJHcI2OobukoXtEHvmUucD/zVvVOyX9oKwQ4hu+iwJDtGsOVnbrmuwYiJzOpv5S/bjS3HXCp0bXqb8lKVFvRe8ZU0Go3GwhXdKr2zb9Ix1VO2Dxmf5phn8xS6lhQnpDgaDXq8/UkfjTl/sNQ3rnaQ/5mUmKqOtxw2V5XDcnYdbnCX1npma9S5Tz4L8Fnln64k+zNveq/LLYmHohmYSdhF/Ra2gcfBMSuZo4tCwgfBbjrf6jsk33nje2v3m6iruUy8N+4qd2b3S+NlYxgb/CE97Je/OonQb1sK+a8K/N7wf/Jk06akqE7RjVjsskjtklyQvDe/Gn13q9iqR+lRi7z+trz4anpcqrDix4T6i6brtWx5hf96Q+E6JQ03No3OSuY9dku4FpxZpj1vki1/KE/zeZ42fN4g7JvonzTB56xYDkizliU7CZ12BS1byI3YqZcf21+7l0rbZIC0SzLoYf3X16N+C/40z9Im41nGy4MVv19KU/5dtWITtZOnm6f3NsPdNr8MOuTRWV9p6ffXoyrFZgh/FafyGdeOO8enSdpZZZhAkIunSdNs0tbVL59khinWixvxXOyaMYDDLUMRH9rfaT2z4lBmVNvzv7JAd9MW94fbv+O7eSHoMarG+2ixoCY79UV6ouqbf0Fds3LZ9L9HzR6m2nWea/o7XHyufdrH/d830qXyXmyAttu2ROpno/WFyYBX8o/ZGjHvUyQXpMxTqXI4nKQZ6NO/3tZ4WjbZx0is5ho3EurHSRJZtc1tZbMdxNh47iaq11PwlTRfkSvsUpa/bSRbYyrCiHZ0Mckgoxrth4nJjm/MftVlQgoW/+Elz3lGRc7zR5KpEYXUbZKpbr6K2deW1rR//abHO8YNmZivFdtCPC2LF2hlrqa+CutaVbU9XhfjeEwj3gecn8Wzi0U9eogKQcFfiphO3XXAyh+Np+cnU+GS5qsXK/TmrXb+ynkhYaKLwFZ4bYsdPtkBWVtcti+psFFJSv3nFr5zJPEZs3sL4JHQlu/E72/JeGn+KUw46/57Y79D8MWJb4SEi+T+xHqs42xesvfDj+P2lM8gLblz5+eppZoe9PaAk79w1q8KslmSa0sIwzbT3qP0+N58ea/wDlTf8rppq+At+oXxT4aeOlWDRnw8mYyXfBqXNR1AxO9hfWen8DMarkuzaig/HkPMkIE25Oc6nvN+9LVKex+mMjLug0Jgcg7rpn0TN7ztN90RSu6ndrPgFzCtPy4pi26u4mdRGg8bP/mrG6Iphc3Szbn7skHm/aETJ6jdOVj+xt7z62WKo/3bHmhe12crKJ374c9gJaB+WBYOeqAGZXmyX4SQldmIqgrDW72mGFZFZVkHijN3qn+H9xSXhxMUF8deEV4ILYmrsaMaztjYXW4TiGaxkGPVq7Ghf56bNGsWlkhjL4hKYJEtg53FikhkneruNE+qwbEBYs5SHt5Xq/lj22aJNp4CQ58sPVJtTJ7eHj8+pJLCS4zeVvEteKlE3kbtZ19uYZYOPZhqaaBwcgdJ1jtWHhPP/k/3fk59+evL8Ofnhh29Wq2/cTINda9wmLPm5oqZ0Y8zn1FIqh7Htbb3Iz0ZEdBUOP8kMLkKvBGv1Bxdk0A/+v3M5SXdMuxFZ/LsB29sdsb6bn5WdbfRYSfzqzV2yM4FuampHAWxky8CQZlyqQ/zjjbSMXCYH1jJ9nG8ZCWLcvfh+D6cnUXr7ld4HWb4du/D+EfTMD7nwOocUnhAj8ujp/qVnCRwpKL09Su7HeChF6R1Wes9OovQ49QTQYxYXpKJT6e4H34uGRXldVpTpFHWySewWJ2anYt+zy8VAuWcRX11+fbV/Sw0xdKciy0vlrqrMB8/zx+QJ2VuXydf8gcZ5tOUieur1mIiZ7KlJFx4pefS4v4anniqHAs4P83KPQA9084sd8yqVVSw1C2M55WY4DTrEaaiBIr3sBlfrykQfN83Pdk/zjkllnf+JpO26tFlsH2uEtxcpvitE3s/ApOI5muXyO5YEO0TtNNiMeXvZ7Es8dbWm/PGYYYnn46m5l7DbSE2YRFXEsi36QOb+u+qOkzqdEab/Mf1/pOn/7oTHPLEuRHQenfH+c/90oOkdery5f1Yt0igT9i/TFeV9kdu46f/rMJjI5ZXoIcz7t3vQTI2P177jUMsj1nacZFXJ+EADwnQf/Jg42HFMxIiIEbExI2I+QPbII2LQudzyQYN++1oJupzXygVp7pPxVLeJUbLto6Trr1lf4rp0jpEyGCnHu42UPYyUGCmPFjrGugZVHfL4MdFBdA8YLBcTqs7V0xks456n2QNmrgfFoPlA1NI1TMpD3R76kKl2dxsyu/EPMWhi0MSgucOgGfUxbXTMKG0PJAb73pdUDQu63Th+uHxN1c+2ZywMPVxEvvuiKhHPch0N3bfx5WeakxuQxKLtH6m19EQaO6ntVPbzPYfrBJmJUTuZKN5lzw3XMyzdI7zWL51wh+vrN0Rzya+81T33vc0FeWkyhhFPs12b2NYl+cUiFl1qwfzIJ8MlmkP56damtmH0t9BYfZ6nL5hEA0P0N939x9thRbmPisp9HA+rdEkFiiUuxYptaT11X8mXvvOanNuoBygk6Tb5gva/rdbeRv5igP+hjp3fw3o7yakY/0i2OsbyhjV92TsGEl1ycmsQTc+GFU8zlcyO50EUfeaQ4Ke5zaKFF2ITHzDcbCg/3/jRmHupdiyDm9xwnxowlTVlFd/ytGWeldf8KEebG76brDPB9nw7YuU2p45YdKAszOQaU4euaTBA5KLIw33PDYcRSNi+Ei/2CMaSXz5Qh1XlndYdXH0nGkkujEvfjriTSb/f0TKddDJ4JGqCT/5ZgQk6lXUzLIX6u1zu8PckrOn8x4Ar0vt2Hrxdf3XOehaHai5Nj+KpPmEqqlnh09M/vuLvgwkG5DePH5PZhoeGbbuq2kf3oGuIg/hub5M94+dM0NEhkU+75kvQgR47e+Sv4kgrinKnHA3qPElnLCnM2d0pKzWIr+h3AXOnlIBv/40uw9fCZA54eWMsvPwRMhwIBlNO9eSrfDcRL2e/TXbLbHycF3bMnWwPO41Aw7Dmxgdj7mumkkfs+O1HyVcMfeLjcfryM19/FzTbdDCtsgp72sxqh0juMr+WDy7bIeGPN7lb3GifyvrCogXZQdXhEi+L/zXt5TPWfHJ1LRhCcz8PxtDc5ry8FLfek7vPXFvY5AtdMI2EHsT2H+mH7U2nXmZz2kD73He2r3tJdpi8Sc5FJGDncjIRoMr+Qd77muWxDscla7Eec9vj5Fm0U82iwz1ZtKsWwegAMBokZC8UzV75qCwqmXpqHZDytpPD0VSW7b6eg51sQEpPpp4gst4wIbZZH7GaRl3HOcuUC9ZplNDalL+IyNI3UTz+NBhud8W242MtS9L7nRFsT7iV52BzsvDOhMtT+540mnDJ3I/mgjLjJl+cA+ati3nBu6fHu8eYnO6OC8BwvNvk9NOCZ+uNm5UOggR45y0e6Wxnnt8GT0XeXpC3cUjBW0I9PQ/u/fonkbsTefkMy7FdgHm29hWT+T6QXfTAu4qM5a9GuYdpWl5qf3KXaC0dB1WyaKo2DL/dnY/jel10SnUxGA3onSZ206/UqQuSZZFMhQ9gBZmJ1n2E2VpxnZNHrafJsGI+ZwGiwiwiZhG/BICwwYGythiFKwaT+on374Zkwv71dmHw9wu95Qa0tl0j8eTbmpO3rPOwzQ8cUuzMk/GjPPZW+3JiGYNYHt6D5aguFoHFiM7U4Xzfx8pVQBO0j4pH2Y15Sh30B9OofzjgQWzQb3zBiajb26jvOnki+i3TDcfdLv8gAvAIePRwJp1SQ1x9c05qwZxGry+Zcyrp6iUQVwhyZTBXAnS7QZ0M7F6yYnS3H0pxdduhDPG4Z7Pxnb+AN9p1QzXTu7kkvB8jP7C7MNhv1/xTK7KPi3xDXlt/Ja9uGOwFH2ohwXVnlNEfsVjz/ouYKdfpOno4bbBezP5ohRciM42viWG72A2Sj9Q0+bt9xemouC/+/hrXXvCX32nmE/EO4PCrV/wmtp+AWTu2Z7PKcpm6GRGCHrJrh4d1ji/IxxvGIewv3rXpTIpd8jpgL3MT3tPmtXL52nptsQGDrHhG8fdyEZdSEn1L4ePHj5c66819M/4oChtivCuPmpRfe3N5463MR2GmPgnOmxnwMshbWKu3NbufeQ9bBn9lFTyu5J3EtnT9FJV9MJKMmPnbkHJxORsX83ExI5dwchkrF/NyPtUybi5m5wJ+3oGhKzi6jKVLeLqcqau4upqtgwLQbcaRvwfdV6YibM+Z3SO43AziAaQFlAinT3Llk0F/NlnMF5PZ4sny/Se3r+QO2wbblx+XOuxzYQUos4dsB5/m+ViSw2Bu8bKt/P3qxa8gSPcBo8JbTte0IhEoloEqISiUAmkOyF9SF77vJ+jjpmJo2e3ddDty/w6EL6X869SIFox5+RPLiL+8FEpfJik3gFILKDWBtA2ESUqmqMADCl2gzAcKnGAnL8g3MLkfFDhCqSeUuUKxLxQ7Q4E35JOQ9wdpuyjj81JGL+P0ElYv4fVCZs8lrsnpKGybG3nFKnCRUh9JX6h0kudo5A+mf6hMPzyA6fudMaC+pVCfY4RqnA9+8SL39VJ13O1NBmPJb3/l0x4vJHp4XDMYLQb6ZDQaDYdP3n10VHdXM8gdBzOAGcAMYAYwA5gBzABm0Aoz6KoHqcEEagA1aLwauA59MphNxoPxbDHoP3HG7rtPu+qB9FgoAhQBigBFgCJAEaAIUIR2KML4EEXIVC0oAhShqYow7g8G+nCw6I+erFYfzff7KELuWCgCFAGKAEWAIkARWqYIh68sWNqua6y30hB+/JHotm954jO0nNP5elA+pHGM336gVmdlKt54YW34n6zfIGJgu/yi4fL9cjjuIbIG4fIIl28BzuYq9HFoturruMcE2biAMt33HjR7cYe8CN/oLKrFiWVIeY2VvQnw1Nk+MzRL31oHuAfcA+4fwvx/g7F9eAC297tdcDvmtOuY084YICLiAf+Af8A/4B/wD/gH/AP+v0DQeyX9q6B/0H/j6R9B77AAWAAsABYAC4AFwAIergWMD7KAHiwAFtAKC0BcOywAFgALgAXAAmABsIDdLOCQKP9fst92coU8WNsOMrAC70bzyL8Yx7G+kq/CjcYR7g/Zzz/JDzbpwkseemeZGN5lGcC43CVGA6hEnSqR+1bml1cKLAeo2QIi/BANvdIAxpPOoqPmf1H5Va4ajOAIK13ln9iKeFjkzpRn1F5f2foSi2DDPr+Kjw+0hV2yKbiDk8+osmrfSI0Ixm6erKuwEkAloBJQiQf4QKH9kjA8QBL63T4soeWWcOQHDxnvxOIDqAZUA6oB1YBqQDWgGlCNFqiGfIlDpWvgiQRcoz2u0bylDnAOOAecA84B54BzwDngHM1yjvFBzjGEc8A5WuUczVpYAeeAc8A54BxwDjgHnAOrM1ySMgvN8+hq7bn8e2eM12zLYlSU8gvyS9ZLjEXiBOHit+g8X3RJRq9iefcQX2Zou4xgSUbN/vBd0L63fYNgGBiEHI1TuXSybPzS16nrLnyzqlTv1yPSmRWu7z7VPNutGTTSKFJpgk3AJmATeILRGk8YHuAJ/e4IooCnFnU+tRjikxCwDdgGbAO2AduAbcA2YBtttA3pwoxq3cBzCehGe3SjeQszoB3QDmgHtAPaAe2AdkA7Gqcd44O0YwLtgHa0SjuatTYD2gHtgHZAO6Ad0A5oR6u0o/6VGi6DyMS3l1yXdfgumW0Ib73krb9m16RvCbEd8paNP+aU9TN8g3RpOXcalzVFwioS76Qo2zPfnvWLLuLodyq0BQ9L2m4tWMRRs2hcRU388uurQ/XhYtdrXX4ddz/S68VCM1QndNA9/EJBp1dxkUV/PpiMmyZDrr86L2f8qLee8pFhL7h/zIeN85W7FJR4VNYXo9jtbXTtz5/rN6M9ci5qFk3Jve1IvWMONuuTgyHS4JuDcCW4Eh7RPBgLGh5iQWoHGoSHN3U+vOljiQpcCi4Fl4JLwaXgUnApuBRcqmkuJV2AUy1TXcgUZKo1MtW8BTiQKkgVpApSBamCVEGqIFWQqpOSqvFBUqVCqiBVrZKqZi0vglRBqiBVkCpIFaQKUgWpOi2pqmuFlGv8SR/U+qhhuXcNetCulmsX1kfBlGBK+5kSHyagS3fSpSZl4YNwpgB9oEwno0yzjServ1AmKNNxnkO1WYaGB8hQX4UN4SFUrQ+hMn6NZVJQKigVlApKBaWCUkGpoFRQqoavlqp0qj6cCk7VGqfCaim4FdwKbgW3glvBreBWcCu41b0tmqp0qwHcCm7VKrfCoim4FdwKbgW3glvBreBWcKsa3Kr+70u996nPOsmchUkdKz5s7fjWqSyNGqjldjXEGynaLldYGlWzD/2aat/z1n58thx8g16ugniPCbWVxXJXKyjPj2CwOImc+HswbkW31jawFzkNssd7EUD2eGrScGYfHsDsfXUIaMcTkTqfiGQ0ECt4QP4gf5A/yB/kD/IH+YP8j7TQpBr9R0B/oH9r0L95C02gAFAAKAAUAAoABYACQAFqXw9RrQBjKAAUoFUK0Kz1EFAAKAAUAAoABYACtF0BzjKXkzVLjtFxJ7NtAA5LxVki/6Ob44vrTG3t5lqiFKqVDNwJmE6+bikb+i4guhd9iXFb9gE8q+Ptkcqa80+6mp/66oXjrh3oVnzOstfFW6/abiJYPFCzPFzNHZtjeumiY13Xep3Ova+1XmiGWXEns0V3dvidnLziyBf52qxc+erR+XRpu66xjiB//w9MHkz9N8acFjaBA5Tg9vYd3dzHUuk9czCs/cfJxAPyKbw/Up1fz46SX7yJ+g492fwKupAdsuu6CaYZfHBzQbbFQYLiiN0T1nlC1gnjxEOne7Gx4673qPKsfr8Lz8ITnzqf+GTVHQs+YGuwNdgabA22BluDrcHWYGuwNfkanWpdwzu1oGst0rUGfg0G2gZtg7ZB26Bt0DZoG7QN2vagtG18kLb1oG3QtnZpW8M+NANtg7ZB26Bt0DZoG7QN2gZtIw39zs6RV6pVfEO018Fr89rudlipVrON/bwgrDflq+2dDWGH245bZkPjSWfRUVtrQzeG69mMIFbT975meYzszrsXjuYVwGqO7f1g/oDxpW5bc3caIMaun4pssCudIsw+D0sDPAueBc/iMcQR1/pUfe2+j3cq4ClEvU8hMvKDtT7gXfAueBe8C94F74J3yf2ulqgE3gGAF8DbHuBt3moJgC/AF+AL8AX4AnwBvrXFm1eCL77iDfBtGfg2K94c4AvwBfgCfAG+AF9E7B41YldVqyJ2sRoTEbuI2AXPHsyz8tVkLuNaVj3cDL7y79nZa42l9VgL8Orm1ihhx1rGaLGWvfziuXhARlmLKbvzhn8U7+ftx2tJnOXge3wjD4yPye17j2Kuovd+H+vtMLdd89x2D1HMcAA4ABwADgAHgAPAAeAAXzSyu1oCxpAASEB7JACR3ZAByABkADIAGYAMQAYQ7b6PDEwgA5CBdskAot0hA5AByABkADIAGYAMSGXgLHO5uJV+n26PSvyLsAE4LBVnifyPbo6P0qa2dnMtUQr4SgY0BdiridcMZoP0BdD3umeZsg9Afrw9UFlzFEvXcixySOZht1suRRO867HtToQ1DrCYo1tMAN88i1gmnyxpp0pS55+XOa6ciAtPgyrUjEwqqu7NcpM4RSyTgoInLAPhInARuAgeTNT8vvUKBO8POmBwPJeo87nEpI+FCgB5gDxAHiAPkAfIA+QB8nW8SL6S5LsgeZB8a0geqw1A9CB6ED2IHkQPogfRt/AN+ZVEr4LoQfStInosGQDRg+hB9CB6ED2IHkT/EKPiR+XY3+3gw1htx36ExYPU75HUd/yUVRJNv9Tnq+6KqdF9N/x7VdI0gVTxgSpQKuadaw4JH1VNO+PTU5h2rnXaOas0iAkHxYJiQbGgWFAsKBYUe1A8dCXG4u0iwNgWYSwCooGzwFngLHAWOAucbV0wcCXODoCzwNl24SyigYGzwFngLHAWOAucfXCRsGqv4v3QIyAvAmERCAtIPRRSJSjqWtqatWJv6hp/0uls41G3EEC/AHFGt0f47bUDOFNJAm+eEm+K6g/mBHO2IK61iib7A6yrwgxqva9TGCGsFUwKJgWTgknBpGBSRKnuD6WY4wSUtgdKEaQKOAWcAk4Bp4BTwOmJx5xWw+kYcAo4bRWcIuQUcAo4BZwCTgGngNPGR5BOqt6lio+iIYQUIaSAzoOhs2hJ05Y8c6uaXH+145ImckXu8/QHA++NMaeFtRs0LEsS1lOdIhGDhkHDrYhtnVTN1E7AuZiprfftAF0Et4KWQcugZdAyaBm0DFpuTtRtFS4PO8Bl4HJ7cBlht8BmYDOwGdgMbAY2A5sPjAeuxGZEUwCbW4bNCAgGNgObgc3AZmAzsBmRyvcUqdwbVLzrVgVaI1AZgcr7wPC16TOkdIhJFx5ZU4rFcWlOXeo5Qj2JxXFRuf2LNSk6Lyq5JiLn9zb5/prwQ8CaYE2wJqZo640DrqLI/hAYiRnaet/YoCIMGCwKFgWLgkXBomBRRNnuDqM9wChgtDUw2rwgW0ApoBRQCigFlAJKH0gMazWU9gGlgNJWQWmzQlgBpYBSQCmgFFD6MKH0LHO5ZHNkKVfi7WGVd9i9nyVyPbolPkqY2trNtT0pXioZzBFYqSZgMBu6GeCkepYp8fDFsup2blNZcxJIV2zEviYzsatWvKVXxdvLEPyK4FesBKuHo13DpKxW3AM217346vaW3ZNHP39uOEi/DHOc6LZveWS2ISJdxLZA0ydD06dNoiBqTPPuHAdbRZT94QBEiVneWt9UkJEUBMKCS8Gl4FJwKbgUXIqY2F3BdAgwBZi2B0zx5lkAKgAVgApABaACUE8/PrYaUEcAVABquwAV73gFoAJQAagAVAAqALUNEaXjiojSHlZ5IaIUEaV7cScf6nUx1gfoudAM8yGTp2HpDmVjy7kUQac8k1hOT0Xjbdpb/7NlLehuD1S9qC0LN9NAcY6TjzVkVUGz2GbVsyZRfaL0gfRAeiA95pzrD9YdV005jwHrmHKudco5438I1gXyA/mB/EB+ID+QH8gP5L/vOOhK5scrH8D8LWL+5sVBg/3B/mB/sD/YH+wP9gf7H8T+40PYf9QB+4P928X+zQoxB/uD/cH+YH+wP9gf7I/o/eNG76v9quh9BAS1XRAQvV8z0mPVaJJGbwzXsxlFrBh4apbHGOi8e8HEshxPs99dmQZ0sSOpni7yR+lqy1rUdKpAs/hmC0gWs9gHM+rwAEbtj7pgVExi1zuJPUbQOkgXpAvSBemCdEG6IN2jxGpXo64K1AXqtgd18c5qIC+QF8gL5AXyAnkfYohyNfL2gLxA3nYhL96CDeQF8gJ5gbxAXiAvInMlXNzrVETm9vHaDkTmIjIXJHswycp51bW0NWvIHtvyJ53ONl4TvvESY2l0+4TffluoNJUoQOkJQaloHwBTgGkLAm2rkLM/wqdcMBVb71Rsf4JAW4ArwBXgCnAFuAJcAa6HxM1Wk+sA5ApybQ+5Im4WBAuCBcGCYEGwINgWhMFWE+wQBAuCbRfBIgwWBAuCBcGCYEGwINgHEdU6rIpqxWovRLUiqhVgel9gml2IVYSnFyRa3vXta6VzOZmcOLG2bSFWLmEgV6zFArVi3rXekNdh1bTrCDyKadd6p117CHkF1YJqQbWgWlAtqBZUW388bCXW4rNewNoWYS3iYYG3wFvgLfAWeAu8bX2wbCXe4t1YwNuW4S2CZYG3wFvgLfAWeAu8ffCRtH21IpK2h6/iIpIWkbSg1pqpdanvzKunCKffX5PWcSlLk0gSgBRACiDFfGvNQbJVqNkfd4CamG6t9xNdXQTJAlgBrABWACuAFcAKYN0n/rWaWDE5CmJtEbEi/hXkCnIFuYJcQa4g1waHtlaTqwpyBbm2i1wR2gpyBbmCXEGuINd2kOtZ5nL59qjEu8Ja77DbP0tkfHRXfCwxtbWba35SDFUyOCTwU018uDUbURqsqOqdZQo9jCIdbZe9KGtODOm6feohuXOqGytN3LR6R6of3SFWt9uriNXF58nqg3oGG1bAoQjUbW2g7tWPxsrwyOXXV2Xofd0dXHdH+V9s+5X8IB/3DZ2GgThDO3q+dmw2trhTfe1H3D0VzbWQvv/4ajqNWPnN44NZfOnY/vrZJne7slsORxNHW7nS34v9idtSJL94cyE7aTSUCuLL/eDzRT23xruc/e9JVKv8PZ1VnGMrMvwcSh2ac7utC58l7mU7rEN7tnllCD5Wnr68zv9mbZuGzsuaDbALzTe9UkeS7HPZMd/tVrGCnkxaGLLiuZUXWUWRbns26V5puaZKlppz2aGfLw65w6rLrahmSa+W2/amsnKx8TAnNNXkH3U5jCvO3/kzOl3b8ynrbDyN9WbOlJWvILOpyTvpoDPSbbb1lv0u2/88JrMNOWc7Du97DmgDwfBxy29I1gi2tfdZAwz/+td/EN9lA/GVS1iO3Yfaq1D7Q9T+tLUYev8AFqV+AQMeHmDA/TG++9JqAz72M63MjArWD8Cj4dHwaHg0PBoeDY+GR8Oj4dHw6FP2aOnKpmqR7kOkIdItEenmLWuCUEOoIdQQagg1hBpCDaGGUEOoIdQnItTjg4QasdkQ6jYJdbNWW0KoIdQQagg1hBpCDaGGUEOoIdQQ6lo+P3SXJc1qp2pJ8wja3GZtxpJmiO59i27kuAy4WGZa3nRFV7azmc42Hv1S7xOqUc5IdNOklEz/ptwnsqey9OSonbALl+dOE97B9JPI4wDdAe6nBO6i2gPcAe6tBPfhAeDeHw8B7njeVePzrhFWYgL/gf/Af+A/8B/4D/wH/n/BBWTV/I+Je/B/W/gfC8jgAfAAeAA8AB4AD4AHPEwPGB/kAWN4ADygRR6AdS/wAHgAPAAeAA+AB+CDbGFr/C3sAUu/yHYWHso7Gt5v8L29TnAIQ5kbutJ+p44bQtE42OxtgivMNedd8MvtIqzgL7pam4ykrOU2yYwo3eQSuMR33WKcvk2PoktZRxRCkGzVXow50V6ZLkXDX/JLcYalm/6cPjUlpL2tXAnBIQnzSfxyxa5pSM4QdgOK/Ch77Uk6XUV8hjWkg7xXJcqrm9q6pJ8yQ7fivjPW/3DMlxtLl9xcVCcSN5f9ZF6isDTT/D3K5eQ1EoWYzOsdDJEXlmEZEZmLZE+D9ZXnqW+wznzDnE8Na2Enh+jK4kx3sXFpvsgPTdsCzBwTlZ9kNKssvb3TExesKivYq8uvt0D4Wjn/439fK2/+8/FrJc3TFWUejqXJmsOamyhY9+/RnYucmgbDUdGdX2zZLpUGSdsV2/Y8N893wejpk4cVNsjixA7fpa+CCyd9OOgh+Vj6OeicgmXBYbe0CGhIseyPT7oRDTBACbcpqcPWBuMAJz44LKrtKu/kkKwMEv1ut5P4o5f8o7uK/z1I/Lub/KPXSe6JmUVRE//uhut330Rp4KiaqJ2VV0meeJg8cfIqaj/5R6z9ymievN/oXlLZ96dtxf1GPE49TRR/sMsXEzUKpdqTydQ1wjHmw3YcUkdnn/8feEUcaIKyBAA= diff --git a/charts/kubezero-metrics/templates/grafana-dashboards-zdt.yaml b/charts/kubezero-metrics/templates/grafana-dashboards-zdt.yaml index 7f44164..5cfc4f4 100644 --- a/charts/kubezero-metrics/templates/grafana-dashboards-zdt.yaml +++ b/charts/kubezero-metrics/templates/grafana-dashboards-zdt.yaml @@ -6,6 +6,8 @@ metadata: labels: grafana_dashboard: "1" {{- include "kubezero-lib.labels" . | nindent 4 }} + annotations: + k8s-sidecar-target-directory: KubeZero binaryData: home.json.gz: H4sIAAAAAAAC/9VVS2/UMBC+8yuMDwikdnezj1b0VrVCVLwKFJBoq8obTxJrHTvYTndb1P+O7byczULFDS6J55vxeB6fxz+fIISJENIQw6TQ+Aj9tJAFOdPGSpdeQjXqNcuScXMmrDLa61BKDNGyVDFYBT5XMgeTQalxYAOCLLnTG1VCgGeM7kBZLMWJ5FI5hypdkueTPTSNIvtZLPZQ9CJ0LUjuDz7uckHP0DEHZXohmLvC21Gis6UkiuJa9+D/1/b74MwxUGa2osWpAHNGLSJKzitEkSK7kJIbVlh84kHmTKYv/ZozsXJVvbz2YkEEcN3Wtalqv3qNd69JGHB6IkXC0rY51RZISMmNb9lDkKG8BaVsQatTm+xaf6nVnUvd95VZcRb4WLsE5gGwaZKr5TsnD3z7xA9aURZ9TnnQNtWAcNTCTy+/n16gtzKV188zYwp9NB7HVIzuQcl9KtfCsBxGtuZjojUYPV6VS3DKMbd79nVOON8/mI8KkaIrfE/NFX5xJb4Bjy35kJHojbX/bu3ROzCKxRo9RW0cQTK59PTDOVErdyweJFbwMmXiKyhtE3Kmh6PFaNb6wC7QV5by281z+OeMJWaoMJ5a+LUNNfBTs9PAxlRR1DH8S0SJFltMiaaPMGW2mykt47Zuye/Jk0hOQfkrGJ6Rk82ZgdwZz0L8RwnKnY/DbutMrl8DoUykejh0nPYTxJah/O4rgzXQ3TafgajYVSMhXA+Uhii1a6ch6W+q/SjDmp1DCp02o0wPieTGnJ/k/y+ZesAf2DT/ezYB0C+Kuzo142c4epigsBltcj6cCm0H3hID2qD3sNYosWMA+aFzat2gC7brfgtrWbfEfqunQUGiQLua4NmkaiXWcQY56VgxPaxgc8fr9qpVZdnjh50fecHtKyjS4YPePXEux06fVPMLC7nej7I6ZmxkjeHetoLFK1DdZvuCUxA9wjcJ3TA78NUtCZ49q1wEZI0mgTALhSjv1otgHYXCbBJqsm49DdZR/dBfN3nZJG46Vlw+ekro+CB0HJ4ynYcC7daHNIx3O5Zm6neF7RX7XgqvXyq51lZZK2ryOa45qjmmofY5waW/FPjg4wdIFy/f3VfobUul+ZOHX/6tTqj9CQAA diff --git a/charts/kubezero-metrics/templates/rules/alertmanager.yaml b/charts/kubezero-metrics/templates/rules/alertmanager.yaml index 8b52dfe..da5d6b8 100644 --- a/charts/kubezero-metrics/templates/rules/alertmanager.yaml +++ b/charts/kubezero-metrics/templates/rules/alertmanager.yaml @@ -23,9 +23,6 @@ spec: ' for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: AlertmanagerMembersInconsistent annotations: @@ -35,9 +32,6 @@ spec: expr: "# Without max_over_time, failed scrapes could create false negatives, see\n# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.\n max_over_time(alertmanager_cluster_members{job=\"alertmanager-main\",namespace=\"monitoring\"}[5m])\n< on (namespace,service) group_left\n count by (namespace,service) (max_over_time(alertmanager_cluster_members{job=\"alertmanager-main\",namespace=\"monitoring\"}[5m]))\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: AlertmanagerFailedToSendAlerts annotations: @@ -47,9 +41,6 @@ spec: expr: "(\n rate(alertmanager_notifications_failed_total{job=\"alertmanager-main\",namespace=\"monitoring\"}[5m])\n/\n rate(alertmanager_notifications_total{job=\"alertmanager-main\",namespace=\"monitoring\"}[5m])\n)\n> 0.01\n" for: 5m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: AlertmanagerClusterFailedToSendAlerts annotations: @@ -59,9 +50,6 @@ spec: expr: "min by (namespace,service, integration) (\n rate(alertmanager_notifications_failed_total{job=\"alertmanager-main\",namespace=\"monitoring\", integration=~`.*`}[5m])\n/\n rate(alertmanager_notifications_total{job=\"alertmanager-main\",namespace=\"monitoring\", integration=~`.*`}[5m])\n)\n> 0.01\n" for: 5m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: AlertmanagerClusterFailedToSendAlerts annotations: @@ -71,9 +59,6 @@ spec: expr: "min by (namespace,service, integration) (\n rate(alertmanager_notifications_failed_total{job=\"alertmanager-main\",namespace=\"monitoring\", integration!~`.*`}[5m])\n/\n rate(alertmanager_notifications_total{job=\"alertmanager-main\",namespace=\"monitoring\", integration!~`.*`}[5m])\n)\n> 0.01\n" for: 5m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: AlertmanagerConfigInconsistent annotations: @@ -83,9 +68,6 @@ spec: expr: "count by (namespace,service) (\n count_values by (namespace,service) (\"config_hash\", alertmanager_config_hash{job=\"alertmanager-main\",namespace=\"monitoring\"})\n)\n!= 1\n" for: 20m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: AlertmanagerClusterDown annotations: @@ -95,9 +77,6 @@ spec: expr: "(\n count by (namespace,service) (\n avg_over_time(up{job=\"alertmanager-main\",namespace=\"monitoring\"}[5m]) < 0.5\n )\n/\n count by (namespace,service) (\n up{job=\"alertmanager-main\",namespace=\"monitoring\"}\n )\n)\n>= 0.5\n" for: 5m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: AlertmanagerClusterCrashlooping annotations: @@ -107,8 +86,5 @@ spec: expr: "(\n count by (namespace,service) (\n changes(process_start_time_seconds{job=\"alertmanager-main\",namespace=\"monitoring\"}[10m]) > 4\n )\n/\n count by (namespace,service) (\n up{job=\"alertmanager-main\",namespace=\"monitoring\"}\n )\n)\n>= 0.5\n" for: 5m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical diff --git a/charts/kubezero-metrics/templates/rules/etcd-mixin.yaml b/charts/kubezero-metrics/templates/rules/etcd-mixin.yaml index 0423fb3..dc9e87c 100644 --- a/charts/kubezero-metrics/templates/rules/etcd-mixin.yaml +++ b/charts/kubezero-metrics/templates/rules/etcd-mixin.yaml @@ -16,9 +16,6 @@ spec: expr: "max without (endpoint) (\n sum without (instance, pod) (up{job=~\".*etcd.*\"} == bool 0)\nor\n count without (To) (\n sum without (instance, pod) (rate(etcd_network_peer_sent_failures_total{job=~\".*etcd.*\"}[120s])) > 0.01\n )\n)\n> 0\n" for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: etcdInsufficientMembers annotations: @@ -29,9 +26,6 @@ spec: ' for: 3m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: etcdNoLeader annotations: @@ -42,9 +36,6 @@ spec: ' for: 1m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: etcdHighNumberOfLeaderChanges annotations: @@ -55,9 +46,6 @@ spec: ' for: 5m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: etcdGRPCRequestsSlow annotations: @@ -70,9 +58,6 @@ spec: ' for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: etcdMemberCommunicationSlow annotations: @@ -85,9 +70,6 @@ spec: ' for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: etcdHighNumberOfFailedProposals annotations: @@ -98,9 +80,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: etcdHighFsyncDurations annotations: @@ -113,9 +92,6 @@ spec: ' for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: etcdHighFsyncDurations annotations: @@ -127,9 +103,6 @@ spec: ' for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: etcdHighCommitDurations annotations: @@ -142,9 +115,6 @@ spec: ' for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: etcdBackendQuotaLowSpace annotations: @@ -154,9 +124,6 @@ spec: ' for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: etcdExcessiveDatabaseGrowth annotations: @@ -166,8 +133,5 @@ spec: ' for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning diff --git a/charts/kubezero-metrics/templates/rules/kube-prometheus.yaml b/charts/kubezero-metrics/templates/rules/kube-prometheus.yaml index b86ed68..96747eb 100644 --- a/charts/kubezero-metrics/templates/rules/kube-prometheus.yaml +++ b/charts/kubezero-metrics/templates/rules/kube-prometheus.yaml @@ -17,9 +17,6 @@ spec: expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10 for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: Watchdog annotations: @@ -38,9 +35,6 @@ spec: summary: An alert that should always be firing to certify that Alertmanager is working properly. expr: vector(1) labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: none - name: node-network rules: @@ -54,9 +48,6 @@ spec: ' for: 2m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - name: kube-prometheus-node-recording.rules rules: diff --git a/charts/kubezero-metrics/templates/rules/kube-state-metrics.yaml b/charts/kubezero-metrics/templates/rules/kube-state-metrics.yaml index 92f0727..81a8a61 100644 --- a/charts/kubezero-metrics/templates/rules/kube-state-metrics.yaml +++ b/charts/kubezero-metrics/templates/rules/kube-state-metrics.yaml @@ -17,9 +17,6 @@ spec: expr: "(sum(rate(kube_state_metrics_list_total{job=\"kube-state-metrics\",result=\"error\"}[5m]))\n /\nsum(rate(kube_state_metrics_list_total{job=\"kube-state-metrics\"}[5m])))\n> 0.01\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: KubeStateMetricsWatchErrors annotations: @@ -29,9 +26,6 @@ spec: expr: "(sum(rate(kube_state_metrics_watch_total{job=\"kube-state-metrics\",result=\"error\"}[5m]))\n /\nsum(rate(kube_state_metrics_watch_total{job=\"kube-state-metrics\"}[5m])))\n> 0.01\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: KubeStateMetricsShardingMismatch annotations: @@ -43,9 +37,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: KubeStateMetricsShardsMissing annotations: @@ -55,8 +46,5 @@ spec: expr: "2^max(kube_state_metrics_total_shards{job=\"kube-state-metrics\"}) - 1\n -\nsum( 2 ^ max by (shard_ordinal) (kube_state_metrics_shard_ordinal{job=\"kube-state-metrics\"}) )\n!= 0\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical diff --git a/charts/kubezero-metrics/templates/rules/kubernetes.yaml b/charts/kubezero-metrics/templates/rules/kubernetes.yaml index dfc3593..953ff5d 100644 --- a/charts/kubezero-metrics/templates/rules/kubernetes.yaml +++ b/charts/kubezero-metrics/templates/rules/kubernetes.yaml @@ -23,9 +23,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubePodNotReady annotations: @@ -35,9 +32,6 @@ spec: expr: "sum by (namespace, pod) (\n max by(namespace, pod) (\n kube_pod_status_phase{job=\"kube-state-metrics\", phase=~\"Pending|Unknown\"}\n ) * on(namespace, pod) group_left(owner_kind) topk by(namespace, pod) (\n 1, max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!=\"Job\"})\n )\n) > 0\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeDeploymentGenerationMismatch annotations: @@ -47,9 +41,6 @@ spec: expr: "kube_deployment_status_observed_generation{job=\"kube-state-metrics\"}\n !=\nkube_deployment_metadata_generation{job=\"kube-state-metrics\"}\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeDeploymentReplicasMismatch annotations: @@ -59,9 +50,6 @@ spec: expr: "(\n kube_deployment_spec_replicas{job=\"kube-state-metrics\"}\n >\n kube_deployment_status_replicas_available{job=\"kube-state-metrics\"}\n) and (\n changes(kube_deployment_status_replicas_updated{job=\"kube-state-metrics\"}[10m])\n ==\n 0\n)\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeStatefulSetReplicasMismatch annotations: @@ -71,9 +59,6 @@ spec: expr: "(\n kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\"}\n !=\n kube_statefulset_status_replicas{job=\"kube-state-metrics\"}\n) and (\n changes(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\"}[10m])\n ==\n 0\n)\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeStatefulSetGenerationMismatch annotations: @@ -83,9 +68,6 @@ spec: expr: "kube_statefulset_status_observed_generation{job=\"kube-state-metrics\"}\n !=\nkube_statefulset_metadata_generation{job=\"kube-state-metrics\"}\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeStatefulSetUpdateNotRolledOut annotations: @@ -95,9 +77,6 @@ spec: expr: "(\n max without (revision) (\n kube_statefulset_status_current_revision{job=\"kube-state-metrics\"}\n unless\n kube_statefulset_status_update_revision{job=\"kube-state-metrics\"}\n )\n *\n (\n kube_statefulset_replicas{job=\"kube-state-metrics\"}\n !=\n kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\"}\n )\n) and (\n changes(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeDaemonSetRolloutStuck annotations: @@ -107,9 +86,6 @@ spec: expr: "(\n (\n kube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_misscheduled{job=\"kube-state-metrics\"}\n !=\n 0\n ) or (\n kube_daemonset_updated_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_available{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n )\n) and (\n changes(kube_daemonset_updated_number_scheduled{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeContainerWaiting annotations: @@ -121,9 +97,6 @@ spec: ' for: 1h labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeDaemonSetNotScheduled annotations: @@ -133,9 +106,6 @@ spec: expr: "kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n -\nkube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"} > 0\n" for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeDaemonSetMisScheduled annotations: @@ -147,9 +117,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeJobCompletion annotations: @@ -161,9 +128,6 @@ spec: ' for: 12h labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeJobFailed annotations: @@ -175,9 +139,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeHpaReplicasMismatch annotations: @@ -187,9 +148,6 @@ spec: expr: "(kube_horizontalpodautoscaler_status_desired_replicas{job=\"kube-state-metrics\"}\n !=\nkube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"})\n and\n(kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"}\n >\nkube_horizontalpodautoscaler_spec_min_replicas{job=\"kube-state-metrics\"})\n and\n(kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"}\n <\nkube_horizontalpodautoscaler_spec_max_replicas{job=\"kube-state-metrics\"})\n and\nchanges(kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"}[15m]) == 0\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeHpaMaxedOut annotations: @@ -199,9 +157,6 @@ spec: expr: "kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"}\n ==\nkube_horizontalpodautoscaler_spec_max_replicas{job=\"kube-state-metrics\"}\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - name: kubernetes-resources rules: @@ -219,9 +174,6 @@ spec: ' for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeMemoryOvercommit annotations: @@ -237,9 +189,6 @@ spec: ' for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeCPUQuotaOvercommit annotations: @@ -249,9 +198,6 @@ spec: expr: "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"cpu\"})\n /\nsum(kube_node_status_allocatable{resource=\"cpu\"})\n > 1.5\n" for: 5m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeMemoryQuotaOvercommit annotations: @@ -261,9 +207,6 @@ spec: expr: "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"memory\"})\n /\nsum(kube_node_status_allocatable{resource=\"memory\",job=\"kube-state-metrics\"})\n > 1.5\n" for: 5m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeQuotaAlmostFull annotations: @@ -273,9 +216,6 @@ spec: expr: "kube_resourcequota{job=\"kube-state-metrics\", type=\"used\"}\n / ignoring(instance, job, type)\n(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\"} > 0)\n > 0.9 < 1\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: info - alert: KubeQuotaFullyUsed annotations: @@ -285,9 +225,6 @@ spec: expr: "kube_resourcequota{job=\"kube-state-metrics\", type=\"used\"}\n / ignoring(instance, job, type)\n(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\"} > 0)\n == 1\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: info - alert: KubeQuotaExceeded annotations: @@ -297,9 +234,6 @@ spec: expr: "kube_resourcequota{job=\"kube-state-metrics\", type=\"used\"}\n / ignoring(instance, job, type)\n(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\"} > 0)\n > 1\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: CPUThrottlingHigh annotations: @@ -309,9 +243,6 @@ spec: expr: "sum(increase(container_cpu_cfs_throttled_periods_total{container!=\"\", }[5m])) by (container, pod, namespace)\n /\nsum(increase(container_cpu_cfs_periods_total{}[5m])) by (container, pod, namespace)\n > ( 25 / 100 )\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: info - name: kubernetes-storage rules: @@ -323,9 +254,6 @@ spec: expr: "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\n" for: 1m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: KubePersistentVolumeFillingUp annotations: @@ -335,9 +263,6 @@ spec: expr: "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\n" for: 1h labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubePersistentVolumeErrors annotations: @@ -349,9 +274,6 @@ spec: ' for: 5m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - name: kubernetes-system rules: @@ -365,9 +287,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeClientErrors annotations: @@ -377,9 +296,6 @@ spec: expr: "(sum(rate(rest_client_requests_total{code=~\"5..\"}[5m])) by (instance, job, namespace)\n /\nsum(rate(rest_client_requests_total[5m])) by (instance, job, namespace))\n> 0.01\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - name: kube-apiserver-slos rules: @@ -397,9 +313,6 @@ spec: ' for: 2m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} long: 1h severity: critical short: 5m @@ -417,9 +330,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} long: 6h severity: critical short: 30m @@ -437,9 +347,6 @@ spec: ' for: 1h labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} long: 1d severity: warning short: 2h @@ -457,9 +364,6 @@ spec: ' for: 3h labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} long: 3d severity: warning short: 6h @@ -474,9 +378,6 @@ spec: ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeClientCertificateExpiration annotations: @@ -487,9 +388,6 @@ spec: ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: AggregatedAPIErrors annotations: @@ -500,9 +398,6 @@ spec: ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: AggregatedAPIDown annotations: @@ -514,9 +409,6 @@ spec: ' for: 5m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeAPIDown annotations: @@ -528,9 +420,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: KubeAPITerminatedRequests annotations: @@ -542,9 +431,6 @@ spec: ' for: 5m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - name: kubernetes-system-kubelet rules: @@ -558,9 +444,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeNodeUnreachable annotations: @@ -572,9 +455,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeletTooManyPods annotations: @@ -584,9 +464,6 @@ spec: expr: "count by(node) (\n (kube_pod_status_phase{job=\"kube-state-metrics\",phase=\"Running\"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job=\"kube-state-metrics\"})\n)\n/\nmax by(node) (\n kube_node_status_capacity{job=\"kube-state-metrics\",resource=\"pods\"} != 1\n) > 0.95\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: info - alert: KubeNodeReadinessFlapping annotations: @@ -598,9 +475,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeletPlegDurationHigh annotations: @@ -612,9 +486,6 @@ spec: ' for: 5m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeletPodStartUpLatencyHigh annotations: @@ -626,9 +497,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeletClientCertificateExpiration annotations: @@ -639,9 +507,6 @@ spec: ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeletClientCertificateExpiration annotations: @@ -652,9 +517,6 @@ spec: ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: KubeletServerCertificateExpiration annotations: @@ -665,9 +527,6 @@ spec: ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeletServerCertificateExpiration annotations: @@ -678,9 +537,6 @@ spec: ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: KubeletClientCertificateRenewalErrors annotations: @@ -692,9 +548,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeletServerCertificateRenewalErrors annotations: @@ -706,9 +559,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: KubeletDown annotations: @@ -720,9 +570,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - name: kubernetes-system-scheduler rules: @@ -736,9 +583,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - name: kubernetes-system-controller-manager rules: @@ -752,108 +596,63 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - name: kube-apiserver-burnrate.rules rules: - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n" labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: read record: apiserver_request:burnrate1d - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n" labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: read record: apiserver_request:burnrate1h - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n" labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: read record: apiserver_request:burnrate2h - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n" labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: read record: apiserver_request:burnrate30m - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n" labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: read record: apiserver_request:burnrate3d - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n" labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: read record: apiserver_request:burnrate5m - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n" labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: read record: apiserver_request:burnrate6h - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n" labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: write record: apiserver_request:burnrate1d - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n" labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: write record: apiserver_request:burnrate1h - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n" labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: write record: apiserver_request:burnrate2h - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n" labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: write record: apiserver_request:burnrate30m - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n" labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: write record: apiserver_request:burnrate3d - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n" labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: write record: apiserver_request:burnrate5m - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n" labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: write record: apiserver_request:burnrate6h - name: kube-apiserver-histogram.rules @@ -862,9 +661,6 @@ spec: ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} quantile: '0.99' verb: read record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile @@ -872,9 +668,6 @@ spec: ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} quantile: '0.99' verb: write record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile @@ -882,27 +675,18 @@ spec: ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} quantile: '0.99' record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile - expr: 'histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)) ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} quantile: '0.9' record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile - expr: 'histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)) ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} quantile: '0.5' record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile - interval: 3m @@ -916,18 +700,12 @@ spec: ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: read record: code:apiserver_request_total:increase30d - expr: 'sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: write record: code:apiserver_request_total:increase30d - expr: 'sum by (cluster, verb, scope) (increase(apiserver_request_duration_seconds_count[1h])) @@ -949,41 +727,26 @@ spec: - expr: "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"\ })\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n" labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: all record: apiserver_request:availability30d - expr: "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n" labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: read record: apiserver_request:availability30d - expr: "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n" labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: write record: apiserver_request:availability30d - expr: 'sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: read record: code_resource:apiserver_request_total:rate5m - expr: 'sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} verb: write record: code_resource:apiserver_request_total:rate5m - expr: 'sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"2.."}[1h])) @@ -1032,23 +795,14 @@ spec: record: namespace_cpu:kube_pod_container_resource_limits:sum - expr: "max by (cluster, namespace, workload, pod) (\n label_replace(\n label_replace(\n kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"ReplicaSet\"},\n \"replicaset\", \"$1\", \"owner_name\", \"(.*)\"\n ) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) (\n 1, max by (replicaset, namespace, owner_name) (\n kube_replicaset_owner{job=\"kube-state-metrics\"}\n )\n ),\n \"workload\", \"$1\", \"owner_name\", \"(.*)\"\n )\n)\n" labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} workload_type: deployment record: namespace_workload_pod:kube_pod_owner:relabel - expr: "max by (cluster, namespace, workload, pod) (\n label_replace(\n kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"DaemonSet\"},\n \"workload\", \"$1\", \"owner_name\", \"(.*)\"\n )\n)\n" labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} workload_type: daemonset record: namespace_workload_pod:kube_pod_owner:relabel - expr: "max by (cluster, namespace, workload, pod) (\n label_replace(\n kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"StatefulSet\"},\n \"workload\", \"$1\", \"owner_name\", \"(.*)\"\n )\n)\n" labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} workload_type: statefulset record: namespace_workload_pod:kube_pod_owner:relabel - name: kube-scheduler.rules @@ -1057,81 +811,54 @@ spec: ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} quantile: '0.99' record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile - expr: 'histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} quantile: '0.99' record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile - expr: 'histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} quantile: '0.99' record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile - expr: 'histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} quantile: '0.9' record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile - expr: 'histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} quantile: '0.9' record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile - expr: 'histogram_quantile(0.9, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} quantile: '0.9' record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile - expr: 'histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} quantile: '0.5' record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile - expr: 'histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} quantile: '0.5' record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile - expr: 'histogram_quantile(0.5, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} quantile: '0.5' record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile - name: node.rules @@ -1148,27 +875,18 @@ spec: ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} quantile: '0.99' record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile - expr: 'histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} quantile: '0.9' record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile - expr: 'histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} quantile: '0.5' record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile diff --git a/charts/kubezero-metrics/templates/rules/node-exporter.yaml b/charts/kubezero-metrics/templates/rules/node-exporter.yaml index 9b56db1..89d2ca3 100644 --- a/charts/kubezero-metrics/templates/rules/node-exporter.yaml +++ b/charts/kubezero-metrics/templates/rules/node-exporter.yaml @@ -17,9 +17,6 @@ spec: expr: "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 20\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 24*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n" for: 1h labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: NodeFilesystemSpaceFillingUp annotations: @@ -29,9 +26,6 @@ spec: expr: "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 15\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 4*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n" for: 1h labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: NodeFilesystemAlmostOutOfSpace annotations: @@ -41,9 +35,6 @@ spec: expr: "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 5\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n" for: 30m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: NodeFilesystemAlmostOutOfSpace annotations: @@ -53,9 +44,6 @@ spec: expr: "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 3\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n" for: 30m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: NodeFilesystemFilesFillingUp annotations: @@ -65,9 +53,6 @@ spec: expr: "(\n node_filesystem_files_free{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_files{job=\"node-exporter\",fstype!=\"\"} * 100 < 40\nand\n predict_linear(node_filesystem_files_free{job=\"node-exporter\",fstype!=\"\"}[6h], 24*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n" for: 1h labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: NodeFilesystemFilesFillingUp annotations: @@ -77,9 +62,6 @@ spec: expr: "(\n node_filesystem_files_free{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_files{job=\"node-exporter\",fstype!=\"\"} * 100 < 20\nand\n predict_linear(node_filesystem_files_free{job=\"node-exporter\",fstype!=\"\"}[6h], 4*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n" for: 1h labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: NodeFilesystemAlmostOutOfFiles annotations: @@ -89,9 +71,6 @@ spec: expr: "(\n node_filesystem_files_free{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_files{job=\"node-exporter\",fstype!=\"\"} * 100 < 5\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n" for: 1h labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: NodeFilesystemAlmostOutOfFiles annotations: @@ -101,9 +80,6 @@ spec: expr: "(\n node_filesystem_files_free{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_files{job=\"node-exporter\",fstype!=\"\"} * 100 < 3\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n" for: 1h labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: NodeNetworkReceiveErrs annotations: @@ -115,9 +91,6 @@ spec: ' for: 1h labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: NodeNetworkTransmitErrs annotations: @@ -129,9 +102,6 @@ spec: ' for: 1h labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: NodeHighNumberConntrackEntriesUsed annotations: @@ -142,9 +112,6 @@ spec: ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: NodeTextFileCollectorScrapeError annotations: @@ -155,9 +122,6 @@ spec: ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: NodeClockSkewDetected annotations: @@ -167,9 +131,6 @@ spec: expr: "(\n node_timex_offset_seconds > 0.05\nand\n deriv(node_timex_offset_seconds[5m]) >= 0\n)\nor\n(\n node_timex_offset_seconds < -0.05\nand\n deriv(node_timex_offset_seconds[5m]) <= 0\n)\n" for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: NodeClockNotSynchronising annotations: @@ -185,9 +146,6 @@ spec: ' for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: NodeRAIDDegraded annotations: @@ -199,9 +157,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: NodeRAIDDiskFailure annotations: @@ -212,9 +167,6 @@ spec: ' labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: NodeFileDescriptorLimit annotations: @@ -224,9 +176,6 @@ spec: expr: "(\n node_filefd_allocated{job=\"node-exporter\"} * 100 / node_filefd_maximum{job=\"node-exporter\"} > 70\n)\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: NodeFileDescriptorLimit annotations: @@ -236,9 +185,6 @@ spec: expr: "(\n node_filefd_allocated{job=\"node-exporter\"} * 100 / node_filefd_maximum{job=\"node-exporter\"} > 90\n)\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - name: node-exporter.rules rules: diff --git a/charts/kubezero-metrics/templates/rules/prometheus-operator.yaml b/charts/kubezero-metrics/templates/rules/prometheus-operator.yaml index 56193e6..f8d3f77 100644 --- a/charts/kubezero-metrics/templates/rules/prometheus-operator.yaml +++ b/charts/kubezero-metrics/templates/rules/prometheus-operator.yaml @@ -19,9 +19,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: PrometheusOperatorWatchErrors annotations: @@ -33,9 +30,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: PrometheusOperatorSyncFailed annotations: @@ -47,9 +41,6 @@ spec: ' for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: PrometheusOperatorReconcileErrors annotations: @@ -61,9 +52,6 @@ spec: ' for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: PrometheusOperatorNodeLookupErrors annotations: @@ -75,9 +63,6 @@ spec: ' for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: PrometheusOperatorNotReady annotations: @@ -89,9 +74,6 @@ spec: ' for: 5m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: PrometheusOperatorRejectedResources annotations: @@ -103,8 +85,5 @@ spec: ' for: 5m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning diff --git a/charts/kubezero-metrics/templates/rules/prometheus.yaml b/charts/kubezero-metrics/templates/rules/prometheus.yaml index 7215ea2..3cbc414 100644 --- a/charts/kubezero-metrics/templates/rules/prometheus.yaml +++ b/charts/kubezero-metrics/templates/rules/prometheus.yaml @@ -23,9 +23,6 @@ spec: ' for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: PrometheusNotificationQueueRunningFull annotations: @@ -35,9 +32,6 @@ spec: expr: "# Without min_over_time, failed scrapes could create false negatives, see\n# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.\n(\n predict_linear(prometheus_notifications_queue_length{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m], 60 * 30)\n>\n min_over_time(prometheus_notifications_queue_capacity{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m])\n)\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers annotations: @@ -47,9 +41,6 @@ spec: expr: "(\n rate(prometheus_notifications_errors_total{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m])\n/\n rate(prometheus_notifications_sent_total{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m])\n)\n* 100\n> 1\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: PrometheusNotConnectedToAlertmanagers annotations: @@ -65,9 +56,6 @@ spec: ' for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: PrometheusTSDBReloadsFailing annotations: @@ -79,9 +67,6 @@ spec: ' for: 4h labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: PrometheusTSDBCompactionsFailing annotations: @@ -93,9 +78,6 @@ spec: ' for: 4h labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: PrometheusNotIngestingSamples annotations: @@ -105,9 +87,6 @@ spec: expr: "(\n rate(prometheus_tsdb_head_samples_appended_total{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m]) <= 0\nand\n (\n sum without(scrape_job) (prometheus_target_metadata_cache_entries{job=\"prometheus-k8s\",namespace=\"monitoring\"}) > 0\n or\n sum without(rule_group) (prometheus_rule_group_rules{job=\"prometheus-k8s\",namespace=\"monitoring\"}) > 0\n )\n)\n" for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: PrometheusDuplicateTimestamps annotations: @@ -119,9 +98,6 @@ spec: ' for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: PrometheusOutOfOrderTimestamps annotations: @@ -133,9 +109,6 @@ spec: ' for: 10m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: PrometheusRemoteStorageFailures annotations: @@ -145,9 +118,6 @@ spec: expr: "(\n (rate(prometheus_remote_storage_failed_samples_total{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m]))\n/\n (\n (rate(prometheus_remote_storage_failed_samples_total{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m]))\n +\n (rate(prometheus_remote_storage_succeeded_samples_total{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m]) or rate(prometheus_remote_storage_samples_total{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m]))\n )\n)\n* 100\n> 1\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: PrometheusRemoteWriteBehind annotations: @@ -157,9 +127,6 @@ spec: expr: "# Without max_over_time, failed scrapes could create false negatives, see\n# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.\n(\n max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m])\n- ignoring(remote_name, url) group_right\n max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m])\n)\n> 120\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: PrometheusRemoteWriteDesiredShards annotations: @@ -169,9 +136,6 @@ spec: expr: "# Without max_over_time, failed scrapes could create false negatives, see\n# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.\n(\n max_over_time(prometheus_remote_storage_shards_desired{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m])\n>\n max_over_time(prometheus_remote_storage_shards_max{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m])\n)\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: PrometheusRuleFailures annotations: @@ -183,9 +147,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: PrometheusMissingRuleEvaluations annotations: @@ -197,9 +158,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: PrometheusTargetLimitHit annotations: @@ -211,9 +169,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: PrometheusLabelLimitHit annotations: @@ -225,9 +180,6 @@ spec: ' for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: warning - alert: PrometheusTargetSyncFailure annotations: @@ -239,9 +191,6 @@ spec: ' for: 5m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical - alert: PrometheusErrorSendingAlertsToAnyAlertmanager annotations: @@ -251,8 +200,5 @@ spec: expr: "min without (alertmanager) (\n rate(prometheus_notifications_errors_total{job=\"prometheus-k8s\",namespace=\"monitoring\",alertmanager!~``}[5m])\n/\n rate(prometheus_notifications_sent_total{job=\"prometheus-k8s\",namespace=\"monitoring\",alertmanager!~``}[5m])\n)\n* 100\n> 3\n" for: 15m labels: - {{- if .Values.additionalRuleLabels }} - {{- toYaml .Values.additionalRuleLabels | nindent 8 }} - {{- end }} severity: critical diff --git a/charts/kubezero-metrics/values.yaml b/charts/kubezero-metrics/values.yaml index 9e07d60..eef269a 100644 --- a/charts/kubezero-metrics/values.yaml +++ b/charts/kubezero-metrics/values.yaml @@ -1,7 +1,3 @@ -#additionalRuleLabels: -# clusterName: myTestCluster -# awsRegion: eu-central-1 - kube-prometheus-stack: global: rbac: @@ -105,7 +101,7 @@ kube-prometheus-stack: logFormat: json # externalUrl: - # gather all monitors + # gather ALL monitors podMonitorSelectorNilUsesHelmValues: false serviceMonitorSelectorNilUsesHelmValues: false ruleSelectorNilUsesHelmValues: false