fix(metrics): Actually update templates

This commit is contained in:
Stefan Reimer 2021-09-28 16:06:36 +02:00
parent 148cd40ca0
commit add0ba1173
4 changed files with 3 additions and 25 deletions

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: kubezero-metrics name: kubezero-metrics
description: KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all Kubernetes integrations. description: KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all Kubernetes integrations.
type: application type: application
version: 0.5.1 version: 0.5.2
home: https://kubezero.com home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords: keywords:

View File

@ -1,6 +1,6 @@
# kubezero-metrics # kubezero-metrics
![Version: 0.5.1](https://img.shields.io/badge/Version-0.5.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![Version: 0.5.2](https://img.shields.io/badge/Version-0.5.2-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all Kubernetes integrations. KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all Kubernetes integrations.

View File

@ -38,7 +38,7 @@
"subdir": "grafana-builder" "subdir": "grafana-builder"
} }
}, },
"version": "b9941184e4c9987077898e788343e21a800d8465", "version": "bc9b685050691a78ee414cd8f789857de0eabe8d",
"sum": "GRf2GvwEU4jhXV+JOonXSZ4wdDv8mnHBPCQ6TUVd+g8=" "sum": "GRf2GvwEU4jhXV+JOonXSZ4wdDv8mnHBPCQ6TUVd+g8="
}, },
{ {

View File

@ -59,28 +59,6 @@ spec:
{{- toYaml .Values.additionalRuleLabels | nindent 8 }} {{- toYaml .Values.additionalRuleLabels | nindent 8 }}
{{- end }} {{- end }}
severity: warning severity: warning
- alert: etcdHighNumberOfFailedGRPCRequests
annotations:
description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
summary: etcd cluster has high number of failed grpc requests.
expr: "100 * sum(rate(grpc_server_handled_total{job=~\".*etcd.*\", grpc_code=~\"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded\"}[5m])) without (grpc_type, grpc_code)\n /\nsum(rate(grpc_server_handled_total{job=~\".*etcd.*\"}[5m])) without (grpc_type, grpc_code)\n > 1\n"
for: 10m
labels:
{{- if .Values.additionalRuleLabels }}
{{- toYaml .Values.additionalRuleLabels | nindent 8 }}
{{- end }}
severity: warning
- alert: etcdHighNumberOfFailedGRPCRequests
annotations:
description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
summary: etcd cluster has high number of failed grpc requests.
expr: "100 * sum(rate(grpc_server_handled_total{job=~\".*etcd.*\", grpc_code=~\"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded\"}[5m])) without (grpc_type, grpc_code)\n /\nsum(rate(grpc_server_handled_total{job=~\".*etcd.*\"}[5m])) without (grpc_type, grpc_code)\n > 5\n"
for: 5m
labels:
{{- if .Values.additionalRuleLabels }}
{{- toYaml .Values.additionalRuleLabels | nindent 8 }}
{{- end }}
severity: critical
- alert: etcdGRPCRequestsSlow - alert: etcdGRPCRequestsSlow
annotations: annotations:
description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile of gRPC requests is {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile of gRPC requests is {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'