feat: metrics version bump
This commit is contained in:
parent
99996b606f
commit
3343d51d09
@ -5,22 +5,22 @@ folder: Istio
|
||||
condition: '.Values.istiod.telemetry.enabled'
|
||||
dashboards:
|
||||
- name: istio-control-plane
|
||||
url: https://grafana.com/api/dashboards/7645/revisions/229/download
|
||||
url: https://grafana.com/api/dashboards/7645/revisions/239/download
|
||||
tags:
|
||||
- Istio
|
||||
- name: istio-mesh
|
||||
url: https://grafana.com/api/dashboards/7639/revisions/229/download
|
||||
url: https://grafana.com/api/dashboards/7639/revisions/239/download
|
||||
tags:
|
||||
- Istio
|
||||
- name: istio-service
|
||||
url: https://grafana.com/api/dashboards/7636/revisions/229/download
|
||||
url: https://grafana.com/api/dashboards/7636/revisions/239/download
|
||||
tags:
|
||||
- Istio
|
||||
- name: istio-workload
|
||||
url: https://grafana.com/api/dashboards/7630/revisions/229/download
|
||||
url: https://grafana.com/api/dashboards/7630/revisions/239/download
|
||||
tags:
|
||||
- Istio
|
||||
- name: istio-performance
|
||||
url: https://grafana.com/api/dashboards/11829/revisions/229/download
|
||||
url: https://grafana.com/api/dashboards/11829/revisions/239/download
|
||||
tags:
|
||||
- Istio
|
||||
|
File diff suppressed because one or more lines are too long
@ -2,7 +2,7 @@ apiVersion: v2
|
||||
name: kubezero-metrics
|
||||
description: KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all Kubernetes integrations.
|
||||
type: application
|
||||
version: 0.10.4
|
||||
version: 0.11.0
|
||||
home: https://kubezero.com
|
||||
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
|
||||
keywords:
|
||||
@ -29,4 +29,4 @@ dependencies:
|
||||
version: 3.0.0
|
||||
repository: https://prometheus-community.github.io/helm-charts
|
||||
condition: prometheus-pushgateway.enabled
|
||||
kubeVersion: ">= 1.26.0"
|
||||
kubeVersion: ">= 1.30.0-0"
|
||||
|
@ -1,6 +1,6 @@
|
||||
# kubezero-metrics
|
||||
|
||||
![Version: 0.10.2](https://img.shields.io/badge/Version-0.10.2-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
|
||||
![Version: 0.10.4](https://img.shields.io/badge/Version-0.10.4-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
|
||||
|
||||
KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all Kubernetes integrations.
|
||||
|
||||
@ -14,14 +14,14 @@ KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all
|
||||
|
||||
## Requirements
|
||||
|
||||
Kubernetes: `>= 1.26.0`
|
||||
Kubernetes: `>= 1.30.0-0`
|
||||
|
||||
| Repository | Name | Version |
|
||||
|------------|------|---------|
|
||||
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.6 |
|
||||
| https://prometheus-community.github.io/helm-charts | kube-prometheus-stack | 66.1.1 |
|
||||
| https://prometheus-community.github.io/helm-charts | kube-prometheus-stack | 69.2.3 |
|
||||
| https://prometheus-community.github.io/helm-charts | prometheus-adapter | 4.11.0 |
|
||||
| https://prometheus-community.github.io/helm-charts | prometheus-pushgateway | 2.15.0 |
|
||||
| https://prometheus-community.github.io/helm-charts | prometheus-pushgateway | 3.0.0 |
|
||||
|
||||
## Values
|
||||
|
||||
|
@ -30,7 +30,7 @@
|
||||
"options": {
|
||||
"content": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only."
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"title": "Notice",
|
||||
"type": "text"
|
||||
},
|
||||
@ -54,7 +54,7 @@
|
||||
},
|
||||
"id": 2,
|
||||
"interval": "1m",
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -100,7 +100,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -134,7 +134,7 @@
|
||||
},
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -232,7 +232,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -276,7 +276,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -319,7 +319,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -353,7 +353,7 @@
|
||||
},
|
||||
"id": 8,
|
||||
"interval": "1m",
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -451,7 +451,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -495,7 +495,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -538,7 +538,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -581,7 +581,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -624,7 +624,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -670,7 +670,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -712,7 +712,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -755,7 +755,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -797,7 +797,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -46,7 +46,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -92,7 +92,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -163,7 +163,7 @@
|
||||
"y": 9
|
||||
},
|
||||
"id": 3,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -328,7 +328,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -374,7 +374,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -420,7 +420,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -466,7 +466,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -512,7 +512,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -558,7 +558,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -604,7 +604,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -650,7 +650,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -696,7 +696,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -742,7 +742,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -35,7 +35,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -86,7 +86,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -137,7 +137,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -188,7 +188,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -239,7 +239,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -314,15 +314,15 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
|
||||
"legendFormat": "{{verb}} {{url}}"
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, le))",
|
||||
"legendFormat": "{{verb}}"
|
||||
}
|
||||
],
|
||||
"title": "Post Request Latency 99th Quantile",
|
||||
@ -365,15 +365,15 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
|
||||
"legendFormat": "{{verb}} {{url}}"
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, le))",
|
||||
"legendFormat": "{{verb}}"
|
||||
}
|
||||
],
|
||||
"title": "Get Request Latency 99th Quantile",
|
||||
@ -416,7 +416,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -467,7 +467,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -518,7 +518,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -35,7 +35,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -70,7 +70,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -105,7 +105,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -140,7 +140,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -175,7 +175,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -210,7 +210,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -260,7 +260,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -319,7 +319,7 @@
|
||||
"y": 12
|
||||
},
|
||||
"id": 8,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -476,7 +476,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -571,7 +571,7 @@
|
||||
"y": 24
|
||||
},
|
||||
"id": 10,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -748,7 +748,7 @@
|
||||
"y": 30
|
||||
},
|
||||
"id": 11,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -892,7 +892,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -943,7 +943,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -994,7 +994,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1045,7 +1045,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1096,7 +1096,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1147,7 +1147,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1198,7 +1198,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1249,7 +1249,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1300,7 +1300,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1351,7 +1351,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1422,7 +1422,7 @@
|
||||
"y": 96
|
||||
},
|
||||
"id": 22,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -35,7 +35,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -70,7 +70,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -105,7 +105,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -140,7 +140,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -242,7 +242,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -317,7 +317,7 @@
|
||||
"y": 14
|
||||
},
|
||||
"id": 6,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -500,7 +500,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -578,7 +578,7 @@
|
||||
"y": 28
|
||||
},
|
||||
"id": 8,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -768,7 +768,7 @@
|
||||
"y": 35
|
||||
},
|
||||
"id": 9,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -912,7 +912,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -963,7 +963,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1014,7 +1014,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1065,7 +1065,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1116,7 +1116,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1167,7 +1167,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1218,7 +1218,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1269,7 +1269,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1340,7 +1340,7 @@
|
||||
"y": 70
|
||||
},
|
||||
"id": 18,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -94,7 +94,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -161,7 +161,7 @@
|
||||
"y": 6
|
||||
},
|
||||
"id": 2,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -323,7 +323,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -342,6 +342,109 @@
|
||||
"legendFormat": "{{pod}}"
|
||||
}
|
||||
],
|
||||
"title": "Memory Usage (w/cache)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true,
|
||||
"stacking": {
|
||||
"mode": "normal"
|
||||
}
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "max capacity"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "red",
|
||||
"mode": "fixed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "custom.stacking",
|
||||
"value": {
|
||||
"mode": "none"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "custom.hideFrom",
|
||||
"value": {
|
||||
"legend": false,
|
||||
"tooltip": true,
|
||||
"viz": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "custom.lineStyle",
|
||||
"value": {
|
||||
"dash": [
|
||||
10,
|
||||
10
|
||||
],
|
||||
"fill": "dash"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 18
|
||||
},
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", job=\"kube-state-metrics\", node=~\"$node\", resource=\"memory\"})",
|
||||
"legendFormat": "max capacity"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)",
|
||||
"legendFormat": "{{pod}}"
|
||||
}
|
||||
],
|
||||
"title": "Memory Usage (w/o cache)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
@ -390,10 +493,10 @@
|
||||
"h": 6,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 18
|
||||
"y": 24
|
||||
},
|
||||
"id": 4,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"id": 5,
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -102,7 +102,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -208,7 +208,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -250,7 +250,7 @@
|
||||
"y": 14
|
||||
},
|
||||
"id": 3,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -433,7 +433,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -494,7 +494,7 @@
|
||||
"y": 28
|
||||
},
|
||||
"id": 5,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -664,7 +664,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -715,7 +715,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -766,7 +766,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -817,7 +817,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -868,7 +868,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -919,7 +919,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -970,7 +970,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1029,7 +1029,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1088,7 +1088,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1139,7 +1139,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1193,7 +1193,7 @@
|
||||
"y": 70
|
||||
},
|
||||
"id": 16,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -0,0 +1,675 @@
|
||||
{
|
||||
"editable": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 4,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "1 - avg(rate(windows_cpu_time_total{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", mode=\"idle\"}[$__rate_interval]))",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "CPU Utilisation",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 4,
|
||||
"x": 4,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) / sum(node:windows_node_num_cpu:sum{cluster=\"$cluster\"})",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "CPU Requests Commitment",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 4,
|
||||
"x": 8,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) / sum(node:windows_node_num_cpu:sum{cluster=\"$cluster\"})",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "CPU Limits Commitment",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 4,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "1 - sum(:windows_node_memory_MemFreeCached_bytes:sum{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "Memory Utilisation",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 4,
|
||||
"x": 16,
|
||||
"y": 0
|
||||
},
|
||||
"id": 5,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "Memory Requests Commitment",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 4,
|
||||
"x": 20,
|
||||
"y": 0
|
||||
},
|
||||
"id": 6,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "Memory Limits Commitment",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"id": 7,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)",
|
||||
"legendFormat": "__auto"
|
||||
}
|
||||
],
|
||||
"title": "CPU Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "/%/"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percentunit"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Namespace"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "links",
|
||||
"value": [
|
||||
{
|
||||
"title": "Drill down to pods",
|
||||
"url": "/d/490b402361724ab1d4c45666c1fa9b6f/k8s-resources-windows-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 14
|
||||
},
|
||||
"id": 8,
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "CPU Quota",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "joinByField",
|
||||
"options": {
|
||||
"byField": "namespace",
|
||||
"mode": "outer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"Time 1": true,
|
||||
"Time 2": true,
|
||||
"Time 3": true,
|
||||
"Time 4": true,
|
||||
"Time 5": true
|
||||
},
|
||||
"indexByName": {
|
||||
"Time 1": 0,
|
||||
"Time 2": 1,
|
||||
"Time 3": 2,
|
||||
"Time 4": 3,
|
||||
"Time 5": 4,
|
||||
"Value #A": 6,
|
||||
"Value #B": 7,
|
||||
"Value #C": 8,
|
||||
"Value #D": 9,
|
||||
"Value #E": 10,
|
||||
"namespace": 5
|
||||
},
|
||||
"renameByName": {
|
||||
"Value #A": "CPU Usage",
|
||||
"Value #B": "CPU Requests",
|
||||
"Value #C": "CPU Requests %",
|
||||
"Value #D": "CPU Limits",
|
||||
"Value #E": "CPU Limits %",
|
||||
"namespace": "Namespace"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "decbytes"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 21
|
||||
},
|
||||
"id": 9,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace)",
|
||||
"legendFormat": "__auto"
|
||||
}
|
||||
],
|
||||
"title": "Memory Usage (Private Working Set)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "/%/"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percentunit"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Memory Usage"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "decbytes"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Memory Requests"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "decbytes"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Memory Limits"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "decbytes"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Namespace"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "links",
|
||||
"value": [
|
||||
{
|
||||
"title": "Drill down to pods",
|
||||
"url": "/d/490b402361724ab1d4c45666c1fa9b6f/k8s-resources-windows-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 28
|
||||
},
|
||||
"id": 10,
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "Memory Requests by Namespace",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "joinByField",
|
||||
"options": {
|
||||
"byField": "namespace",
|
||||
"mode": "outer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"Time 1": true,
|
||||
"Time 2": true,
|
||||
"Time 3": true,
|
||||
"Time 4": true,
|
||||
"Time 5": true
|
||||
},
|
||||
"indexByName": {
|
||||
"Time 1": 0,
|
||||
"Time 2": 1,
|
||||
"Time 3": 2,
|
||||
"Time 4": 3,
|
||||
"Time 5": 4,
|
||||
"Value #A": 6,
|
||||
"Value #B": 7,
|
||||
"Value #C": 8,
|
||||
"Value #D": 9,
|
||||
"Value #E": 10,
|
||||
"namespace": 5
|
||||
},
|
||||
"renameByName": {
|
||||
"Value #A": "Memory Usage",
|
||||
"Value #B": "Memory Requests",
|
||||
"Value #C": "Memory Requests %",
|
||||
"Value #D": "Memory Limits",
|
||||
"Value #E": "Memory Limits %",
|
||||
"namespace": "Namespace"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 39,
|
||||
"tags": [
|
||||
"kubernetes"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Data source",
|
||||
"name": "datasource",
|
||||
"query": "prometheus",
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"hide": 2,
|
||||
"label": "cluster",
|
||||
"name": "cluster",
|
||||
"query": "label_values(up{job=\"kubernetes-windows-exporter\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Compute Resources / Cluster(Windows)",
|
||||
"uid": "4d08557fd9391b100730f2494bccac68"
|
||||
}
|
@ -0,0 +1,442 @@
|
||||
{
|
||||
"editable": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"legendFormat": "__auto"
|
||||
}
|
||||
],
|
||||
"title": "CPU Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "/%/"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percentunit"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Pod"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "links",
|
||||
"value": [
|
||||
{
|
||||
"title": "Drill down to pods",
|
||||
"url": "/d/40597a704a610e936dc6ed374a7ce023/k8s-resources-windows-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"id": 2,
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "CPU Quota",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "joinByField",
|
||||
"options": {
|
||||
"byField": "pod",
|
||||
"mode": "outer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"Time 1": true,
|
||||
"Time 2": true,
|
||||
"Time 3": true,
|
||||
"Time 4": true,
|
||||
"Time 5": true
|
||||
},
|
||||
"indexByName": {
|
||||
"Time 1": 0,
|
||||
"Time 2": 1,
|
||||
"Time 3": 2,
|
||||
"Time 4": 3,
|
||||
"Time 5": 4,
|
||||
"Value #A": 6,
|
||||
"Value #B": 7,
|
||||
"Value #C": 8,
|
||||
"Value #D": 9,
|
||||
"Value #E": 10,
|
||||
"pod": 5
|
||||
},
|
||||
"renameByName": {
|
||||
"Value #A": "CPU Usage",
|
||||
"Value #B": "CPU Requests",
|
||||
"Value #C": "CPU Requests %",
|
||||
"Value #D": "CPU Limits",
|
||||
"Value #E": "CPU Limits %",
|
||||
"pod": "Pod"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "decbytes"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 14
|
||||
},
|
||||
"id": 3,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"legendFormat": "__auto"
|
||||
}
|
||||
],
|
||||
"title": "Memory Usage (Private Working Set)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "/%/"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percentunit"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Pod"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "links",
|
||||
"value": [
|
||||
{
|
||||
"title": "Drill down to pods",
|
||||
"url": "/d/40597a704a610e936dc6ed374a7ce023/k8s-resources-windows-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 21
|
||||
},
|
||||
"id": 4,
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "Memory Quota",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "joinByField",
|
||||
"options": {
|
||||
"byField": "pod",
|
||||
"mode": "outer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"Time 1": true,
|
||||
"Time 2": true,
|
||||
"Time 3": true,
|
||||
"Time 4": true,
|
||||
"Time 5": true
|
||||
},
|
||||
"indexByName": {
|
||||
"Time 1": 0,
|
||||
"Time 2": 1,
|
||||
"Time 3": 2,
|
||||
"Time 4": 3,
|
||||
"Time 5": 4,
|
||||
"Value #A": 6,
|
||||
"Value #B": 7,
|
||||
"Value #C": 8,
|
||||
"Value #D": 9,
|
||||
"Value #E": 10,
|
||||
"pod": 5
|
||||
},
|
||||
"renameByName": {
|
||||
"Value #A": "Memory Usage",
|
||||
"Value #B": "Memory Requests",
|
||||
"Value #C": "Memory Requests %",
|
||||
"Value #D": "Memory Limits",
|
||||
"Value #E": "Memory Limits %",
|
||||
"pod": "Pod"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 39,
|
||||
"tags": [
|
||||
"kubernetes"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Data source",
|
||||
"name": "datasource",
|
||||
"query": "prometheus",
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"hide": 2,
|
||||
"label": "cluster",
|
||||
"name": "cluster",
|
||||
"query": "label_values(up{job=\"kubernetes-windows-exporter\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "namespace",
|
||||
"name": "namespace",
|
||||
"query": "label_values(windows_pod_container_available{cluster=\"$cluster\"}, namespace)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Compute Resources / Namespace(Windows)",
|
||||
"uid": "490b402361724ab1d4c45666c1fa9b6f"
|
||||
}
|
@ -0,0 +1,497 @@
|
||||
{
|
||||
"editable": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"legendFormat": "__auto"
|
||||
}
|
||||
],
|
||||
"title": "CPU Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "/%/"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percentunit"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Namespace"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "links",
|
||||
"value": [
|
||||
{
|
||||
"title": "Drill down to pods",
|
||||
"url": "/d/490b402361724ab1d4c45666c1fa9b6f/k8s-resources-windows-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"id": 2,
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "CPU Quota",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "joinByField",
|
||||
"options": {
|
||||
"byField": "container",
|
||||
"mode": "outer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"Time 1": true,
|
||||
"Time 2": true,
|
||||
"Time 3": true,
|
||||
"Time 4": true,
|
||||
"Time 5": true
|
||||
},
|
||||
"indexByName": {
|
||||
"Time 1": 0,
|
||||
"Time 2": 1,
|
||||
"Time 3": 2,
|
||||
"Time 4": 3,
|
||||
"Time 5": 4,
|
||||
"Value #A": 6,
|
||||
"Value #B": 7,
|
||||
"Value #C": 8,
|
||||
"Value #D": 9,
|
||||
"Value #E": 10,
|
||||
"container": 5
|
||||
},
|
||||
"renameByName": {
|
||||
"Value #A": "CPU Usage",
|
||||
"Value #B": "CPU Requests",
|
||||
"Value #C": "CPU Requests %",
|
||||
"Value #D": "CPU Limits",
|
||||
"Value #E": "CPU Limits %",
|
||||
"container": "Container"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "decbytes"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 14
|
||||
},
|
||||
"id": 3,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"legendFormat": "__auto"
|
||||
}
|
||||
],
|
||||
"title": "Memory Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "/%/"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percentunit"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 21
|
||||
},
|
||||
"id": 4,
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "Memory Quota",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "joinByField",
|
||||
"options": {
|
||||
"byField": "container",
|
||||
"mode": "outer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"Time 1": true,
|
||||
"Time 2": true,
|
||||
"Time 3": true,
|
||||
"Time 4": true,
|
||||
"Time 5": true
|
||||
},
|
||||
"indexByName": {
|
||||
"Time 1": 0,
|
||||
"Time 2": 1,
|
||||
"Time 3": 2,
|
||||
"Time 4": 3,
|
||||
"Time 5": 4,
|
||||
"Value #A": 6,
|
||||
"Value #B": 7,
|
||||
"Value #C": 8,
|
||||
"Value #D": 9,
|
||||
"Value #E": 10,
|
||||
"container": 5
|
||||
},
|
||||
"renameByName": {
|
||||
"Value #A": "Memory Usage",
|
||||
"Value #B": "Memory Requests",
|
||||
"Value #C": "Memory Requests %",
|
||||
"Value #D": "Memory Limits",
|
||||
"Value #E": "Memory Limits %",
|
||||
"container": "Container"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "bytes"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 28
|
||||
},
|
||||
"id": 5,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sort_desc(sum by (container) (rate(windows_container_network_received_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))",
|
||||
"legendFormat": "Received : {{ container }}"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sort_desc(sum by (container) (rate(windows_container_network_transmitted_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))",
|
||||
"legendFormat": "Transmitted : {{ container }}"
|
||||
}
|
||||
],
|
||||
"title": "Network I/O",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 39,
|
||||
"tags": [
|
||||
"kubernetes"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Data source",
|
||||
"name": "datasource",
|
||||
"query": "prometheus",
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"hide": 2,
|
||||
"label": "cluster",
|
||||
"name": "cluster",
|
||||
"query": "label_values(up{job=\"kubernetes-windows-exporter\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "namespace",
|
||||
"name": "namespace",
|
||||
"query": "label_values(windows_pod_container_available{cluster=\"$cluster\"}, namespace)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "pod",
|
||||
"name": "pod",
|
||||
"query": "label_values(windows_pod_container_available{cluster=\"$cluster\",namespace=\"$namespace\"}, pod)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Compute Resources / Pod(Windows)",
|
||||
"uid": "40597a704a610e936dc6ed374a7ce023"
|
||||
}
|
@ -50,7 +50,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -109,7 +109,7 @@
|
||||
"y": 7
|
||||
},
|
||||
"id": 2,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -240,7 +240,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -302,7 +302,7 @@
|
||||
"y": 21
|
||||
},
|
||||
"id": 4,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -453,7 +453,7 @@
|
||||
"y": 28
|
||||
},
|
||||
"id": 5,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -597,7 +597,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -648,7 +648,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -699,7 +699,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -750,7 +750,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -801,7 +801,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -852,7 +852,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -903,7 +903,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -954,7 +954,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -102,7 +102,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -189,7 +189,7 @@
|
||||
"y": 7
|
||||
},
|
||||
"id": 2,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -397,7 +397,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -487,7 +487,7 @@
|
||||
"y": 21
|
||||
},
|
||||
"id": 4,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -663,7 +663,7 @@
|
||||
"y": 28
|
||||
},
|
||||
"id": 5,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -807,7 +807,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -858,7 +858,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -909,7 +909,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -960,7 +960,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1011,7 +1011,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1062,7 +1062,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1113,7 +1113,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1164,7 +1164,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -0,0 +1,404 @@
|
||||
{
|
||||
"editable": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_cpu_utilisation:avg1m{cluster=\"$cluster\"} * node:windows_node_num_cpu:sum{cluster=\"$cluster\"} / scalar(sum(node:windows_node_num_cpu:sum{cluster=\"$cluster\"}))",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "CPU Utilisation",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"id": 2,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_memory_utilisation:ratio{cluster=\"$cluster\"}",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Memory Utilisation",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "short"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 7
|
||||
},
|
||||
"id": 3,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_memory_swap_io_pages:irate{cluster=\"$cluster\"}",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Memory Saturation (Swap I/O Pages)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 14
|
||||
},
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_disk_utilisation:avg_irate{cluster=\"$cluster\"} / scalar(node:windows_node:sum{cluster=\"$cluster\"})",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Disk IO Utilisation",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "Bps"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 21
|
||||
},
|
||||
"id": 5,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_net_utilisation:sum_irate{cluster=\"$cluster\"}",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Net Utilisation (Transmitted)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "Bps"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 21
|
||||
},
|
||||
"id": 6,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_net_saturation:sum_irate{cluster=\"$cluster\"}",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Net Utilisation (Dropped)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 28
|
||||
},
|
||||
"id": 7,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum by (instance)(node:windows_node_filesystem_usage:{cluster=\"$cluster\"})",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Disk Capacity",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 39,
|
||||
"tags": [
|
||||
"kubernetes"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Data source",
|
||||
"name": "datasource",
|
||||
"query": "prometheus",
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"hide": 2,
|
||||
"label": "cluster",
|
||||
"name": "cluster",
|
||||
"query": "label_values(up{job=\"kubernetes-windows-exporter\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "USE Method / Cluster(Windows)",
|
||||
"uid": "53a43377ec9aaf2ff64dfc7a1f539334"
|
||||
}
|
@ -0,0 +1,615 @@
|
||||
{
|
||||
"editable": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_cpu_utilisation:avg1m{cluster=\"$cluster\", instance=\"$instance\"}",
|
||||
"legendFormat": "Utilisation"
|
||||
}
|
||||
],
|
||||
"title": "CPU Utilisation",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum by (core) (irate(windows_cpu_time_total{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", mode!=\"idle\", instance=\"$instance\"}[$__rate_interval]))",
|
||||
"legendFormat": "{{core}}"
|
||||
}
|
||||
],
|
||||
"title": "CPU Usage Per Core",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"id": 3,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_memory_utilisation:{cluster=\"$cluster\", instance=\"$instance\"}",
|
||||
"legendFormat": "Memory"
|
||||
}
|
||||
],
|
||||
"title": "Memory Utilisation %",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "bytes"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 7
|
||||
},
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "max(\n windows_os_visible_memory_bytes{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"}\n - windows_memory_available_bytes{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"}\n)\n",
|
||||
"legendFormat": "memory used"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "max(node:windows_node_memory_totalCached_bytes:sum{cluster=\"$cluster\", instance=\"$instance\"})",
|
||||
"legendFormat": "memory cached"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "max(windows_memory_available_bytes{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"})",
|
||||
"legendFormat": "memory free"
|
||||
}
|
||||
],
|
||||
"title": "Memory Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "short"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 7
|
||||
},
|
||||
"id": 5,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_memory_swap_io_pages:irate{cluster=\"$cluster\", instance=\"$instance\"}",
|
||||
"legendFormat": "Swap IO"
|
||||
}
|
||||
],
|
||||
"title": "Memory Saturation (Swap I/O) Pages",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 14
|
||||
},
|
||||
"id": 6,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_disk_utilisation:avg_irate{cluster=\"$cluster\", instance=\"$instance\"}",
|
||||
"legendFormat": "Utilisation"
|
||||
}
|
||||
],
|
||||
"title": "Disk IO Utilisation",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "/io time/"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "ms"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 14
|
||||
},
|
||||
"id": 7,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "max(rate(windows_logical_disk_read_bytes_total{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"}[$__rate_interval]))",
|
||||
"legendFormat": "read"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "max(rate(windows_logical_disk_write_bytes_total{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"}[$__rate_interval]))",
|
||||
"legendFormat": "written"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "max(rate(windows_logical_disk_read_seconds_total{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"}[$__rate_interval]) + rate(windows_logical_disk_write_seconds_total{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"}[$__rate_interval]))",
|
||||
"legendFormat": "io time"
|
||||
}
|
||||
],
|
||||
"title": "Disk IO",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 21
|
||||
},
|
||||
"id": 8,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_filesystem_usage:{cluster=\"$cluster\", instance=\"$instance\"}",
|
||||
"legendFormat": "{{volume}}"
|
||||
}
|
||||
],
|
||||
"title": "Disk Utilisation",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "Bps"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 28
|
||||
},
|
||||
"id": 9,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_net_utilisation:sum_irate{cluster=\"$cluster\", instance=\"$instance\"}",
|
||||
"legendFormat": "Utilisation"
|
||||
}
|
||||
],
|
||||
"title": "Net Utilisation (Transmitted)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "Bps"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 28
|
||||
},
|
||||
"id": 10,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_net_saturation:sum_irate{cluster=\"$cluster\", instance=\"$instance\"}",
|
||||
"legendFormat": "Saturation"
|
||||
}
|
||||
],
|
||||
"title": "Net Saturation (Dropped)",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 39,
|
||||
"tags": [
|
||||
"kubernetes"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Data source",
|
||||
"name": "datasource",
|
||||
"query": "prometheus",
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"hide": 2,
|
||||
"label": "cluster",
|
||||
"name": "cluster",
|
||||
"query": "label_values(up{job=\"kubernetes-windows-exporter\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "instance",
|
||||
"name": "instance",
|
||||
"query": "label_values(windows_system_system_up_time{cluster=\"$cluster\"}, instance)",
|
||||
"refresh": 2,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "USE Method / Node(Windows)",
|
||||
"uid": "96e7484b0bb53b74fbc2bcb7723cd40b"
|
||||
}
|
@ -35,7 +35,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -70,7 +70,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -105,7 +105,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -140,7 +140,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -175,7 +175,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -210,7 +210,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -261,7 +261,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -312,7 +312,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -363,7 +363,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -414,7 +414,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -473,7 +473,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -532,7 +532,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -583,7 +583,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -634,7 +634,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -685,7 +685,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -736,7 +736,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -787,7 +787,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -838,7 +838,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -889,7 +889,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -940,7 +940,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1015,15 +1015,15 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, verb, url, le))",
|
||||
"legendFormat": "{{instance}} {{verb}} {{url}}"
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, verb, le))",
|
||||
"legendFormat": "{{instance}} {{verb}}"
|
||||
}
|
||||
],
|
||||
"title": "Request duration 99th quantile",
|
||||
@ -1066,7 +1066,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1117,7 +1117,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1168,7 +1168,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -53,7 +53,7 @@
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -106,7 +106,7 @@
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -177,7 +177,7 @@
|
||||
"y": 9
|
||||
},
|
||||
"id": 3,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -316,7 +316,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -362,7 +362,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -408,7 +408,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -454,7 +454,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -500,7 +500,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -546,7 +546,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -39,7 +39,7 @@
|
||||
"displayMode": "basic",
|
||||
"showUnfilled": false
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -78,7 +78,7 @@
|
||||
"displayMode": "basic",
|
||||
"showUnfilled": false
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -149,7 +149,7 @@
|
||||
"y": 9
|
||||
},
|
||||
"id": 3,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -335,7 +335,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -386,7 +386,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -437,7 +437,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -488,7 +488,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -539,7 +539,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -590,7 +590,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -641,7 +641,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -692,7 +692,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -50,7 +50,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -112,7 +112,7 @@
|
||||
},
|
||||
"id": 2,
|
||||
"interval": "1m",
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -162,7 +162,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -224,7 +224,7 @@
|
||||
},
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -53,7 +53,7 @@
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -106,7 +106,7 @@
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -152,7 +152,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -198,7 +198,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -244,7 +244,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -290,7 +290,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -336,7 +336,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -382,7 +382,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -35,7 +35,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -86,7 +86,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -137,7 +137,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -188,7 +188,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -239,7 +239,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -290,7 +290,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -365,15 +365,15 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
|
||||
"legendFormat": "{{verb}} {{url}}"
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[$__rate_interval])) by (verb, le))",
|
||||
"legendFormat": "{{verb}}"
|
||||
}
|
||||
],
|
||||
"title": "Post Request Latency 99th Quantile",
|
||||
@ -416,15 +416,15 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
|
||||
"legendFormat": "{{verb}} {{url}}"
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, le))",
|
||||
"legendFormat": "{{verb}}"
|
||||
}
|
||||
],
|
||||
"title": "Get Request Latency 99th Quantile",
|
||||
@ -467,7 +467,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -518,7 +518,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -569,7 +569,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -35,7 +35,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -86,7 +86,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -161,7 +161,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -236,7 +236,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -311,15 +311,15 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
|
||||
"legendFormat": "{{verb}} {{url}}"
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, le))",
|
||||
"legendFormat": "{{verb}}"
|
||||
}
|
||||
],
|
||||
"title": "Post Request Latency 99th Quantile",
|
||||
@ -362,15 +362,15 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
|
||||
"legendFormat": "{{verb}} {{url}}"
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, le))",
|
||||
"legendFormat": "{{verb}}"
|
||||
}
|
||||
],
|
||||
"title": "Get Request Latency 99th Quantile",
|
||||
@ -413,7 +413,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -464,7 +464,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -515,7 +515,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -39,7 +39,7 @@
|
||||
"displayMode": "basic",
|
||||
"showUnfilled": false
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -78,7 +78,7 @@
|
||||
"displayMode": "basic",
|
||||
"showUnfilled": false
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -117,7 +117,7 @@
|
||||
"displayMode": "basic",
|
||||
"showUnfilled": false
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -156,7 +156,7 @@
|
||||
"displayMode": "basic",
|
||||
"showUnfilled": false
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -207,7 +207,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -258,7 +258,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -309,7 +309,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -360,7 +360,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -411,7 +411,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -462,7 +462,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -18,8 +18,8 @@
|
||||
"subdir": "contrib/mixin"
|
||||
}
|
||||
},
|
||||
"version": "2b323071a8bd4f02ddaf63e0dfa1fd98c221dccb",
|
||||
"sum": "IXI3LQIT9NmTPJAk8WLUJd5+qZfcGpeNCyWIK7oEpws="
|
||||
"version": "f30cbaac111aa01a310fe75360c759cdd4d9cd14",
|
||||
"sum": "XmXkOCriQIZmXwlIIFhqlJMa0e6qGWdxZD+ZDYaN0Po="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -41,16 +41,6 @@
|
||||
"version": "a1d61cce1da59c71409b99b5c7568511fec661ea",
|
||||
"sum": "342u++/7rViR/zj2jeJOjshzglkZ1SY+hFNuyCBFMdc="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/grafana/grafonnet-lib.git",
|
||||
"subdir": "grafonnet-7.0"
|
||||
}
|
||||
},
|
||||
"version": "a1d61cce1da59c71409b99b5c7568511fec661ea",
|
||||
"sum": "gCtR9s/4D5fxU9aKXg0Bru+/njZhA0YjLjPiASc61FM="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
@ -58,8 +48,8 @@
|
||||
"subdir": "gen/grafonnet-latest"
|
||||
}
|
||||
},
|
||||
"version": "82a19822e54a0a12a51e24dbd48fcde717dc0864",
|
||||
"sum": "64fMUPI3frXGj4X1FqFd1t7r04w3CUSmXaDcJ23EYbQ="
|
||||
"version": "d20e609202733790caf5b554c9945d049f243ae3",
|
||||
"sum": "V9vAj21qJOc2DlMPDgB1eEjSQU4A+sAA4AXuJ6bd4xc="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -68,18 +58,18 @@
|
||||
"subdir": "gen/grafonnet-v10.0.0"
|
||||
}
|
||||
},
|
||||
"version": "82a19822e54a0a12a51e24dbd48fcde717dc0864",
|
||||
"version": "d20e609202733790caf5b554c9945d049f243ae3",
|
||||
"sum": "xdcrJPJlpkq4+5LpGwN4tPAuheNNLXZjE6tDcyvFjr0="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/grafana/grafonnet.git",
|
||||
"subdir": "gen/grafonnet-v11.1.0"
|
||||
"subdir": "gen/grafonnet-v11.4.0"
|
||||
}
|
||||
},
|
||||
"version": "82a19822e54a0a12a51e24dbd48fcde717dc0864",
|
||||
"sum": "41w7p/rwrNsITqNHMXtGSJAfAyKmnflg6rFhKBduUxM="
|
||||
"version": "d20e609202733790caf5b554c9945d049f243ae3",
|
||||
"sum": "aVAX09paQYNOoCSKVpuk1exVIyBoMt/C50QJI+Q/3nA="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -88,7 +78,7 @@
|
||||
"subdir": "grafana-builder"
|
||||
}
|
||||
},
|
||||
"version": "767befa8fb46a07be516dec2777d7d89909a529d",
|
||||
"version": "d6c38bb26f576b128cadca4137d73a037afdd872",
|
||||
"sum": "yxqWcq/N3E/a/XreeU6EuE6X7kYPnG0AspAQFKOjASo="
|
||||
},
|
||||
{
|
||||
@ -108,8 +98,8 @@
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "63d430b69a95741061c2f7fc9d84b1a778511d9c",
|
||||
"sum": "qiZi3axUSXCVzKUF83zSAxklwrnitMmrDK4XAfjPMdE="
|
||||
"version": "1199b50e9d2ff53d4bb5fb2304ad1fb69d38e609",
|
||||
"sum": "LfbgcJbilu4uBdKYZSvmkoOTPwEAzg10L3/VqKAIWtA="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -118,8 +108,8 @@
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "a3fbf21977deb89b7d843eb8371170c011ea6835",
|
||||
"sum": "57zW2IGJ9zbYd8BI0qe6JkoWTRSMNiBUWC6+YcnEsWo="
|
||||
"version": "e27267571be06c2bdc3d2fd8dbd70161cd709cb4",
|
||||
"sum": "je1RPCp2aFNefYs5Q57Q5wDm93p8pL4pdBtA5rC7jLA="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -128,8 +118,8 @@
|
||||
"subdir": "jsonnet/kube-state-metrics"
|
||||
}
|
||||
},
|
||||
"version": "32e7727ff4613b0f55dfc18aff15afb8c04d03c5",
|
||||
"sum": "lO7jUSzAIy8Yk9pOWJIWgPRhubkWzVh56W6wtYfbVH4="
|
||||
"version": "2a95d4649b2fea55799032fb9c0b571c4ba7f776",
|
||||
"sum": "3bioG7CfTfY9zeu5xU4yon6Zt3kYvNkyl492nOhQxnM="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -138,7 +128,7 @@
|
||||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
||||
}
|
||||
},
|
||||
"version": "32e7727ff4613b0f55dfc18aff15afb8c04d03c5",
|
||||
"version": "2a95d4649b2fea55799032fb9c0b571c4ba7f776",
|
||||
"sum": "qclI7LwucTjBef3PkGBkKxF0mfZPbHnn4rlNWKGtR4c="
|
||||
},
|
||||
{
|
||||
@ -148,8 +138,8 @@
|
||||
"subdir": "jsonnet/kube-prometheus"
|
||||
}
|
||||
},
|
||||
"version": "7e5a571a3fb735c78e17c76a637eb7e8bb5dd086",
|
||||
"sum": "uTw/Mj+X91S+oqUpAX81xcfWPDlox0tdSZY/YBw7nGE="
|
||||
"version": "1eea946a1532f1e8cccfceea98d907bf3a10b1d9",
|
||||
"sum": "17LhiwefVfoNDsF3DcFZw/UL4PMU7YpNNUaOdaYd1gE="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -158,7 +148,7 @@
|
||||
"subdir": "jsonnet/mixin"
|
||||
}
|
||||
},
|
||||
"version": "a366602bacb2c8d773a9cee058b6971b8d2e3732",
|
||||
"version": "465bcbaf2a727c942e7f923aacfb9dff9af8d4a1",
|
||||
"sum": "gi+knjdxs2T715iIQIntrimbHRgHnpM8IFBJDD1gYfs=",
|
||||
"name": "prometheus-operator-mixin"
|
||||
},
|
||||
@ -169,8 +159,8 @@
|
||||
"subdir": "jsonnet/prometheus-operator"
|
||||
}
|
||||
},
|
||||
"version": "a366602bacb2c8d773a9cee058b6971b8d2e3732",
|
||||
"sum": "z0/lCiMusMHTqntsosMVGYkVcSZjCpyZBmUMVUsK5nA="
|
||||
"version": "465bcbaf2a727c942e7f923aacfb9dff9af8d4a1",
|
||||
"sum": "LctDdofQostvviE5y8vpRKWGGO1ZKO3dgJe7P9xifW0="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -179,7 +169,7 @@
|
||||
"subdir": "doc/alertmanager-mixin"
|
||||
}
|
||||
},
|
||||
"version": "0f65e8fa5fc72d2678655105c0213b416ca6f34c",
|
||||
"version": "b5d1a64ad5bb0ff879705714d1e40cea82efbd5c",
|
||||
"sum": "Mf4h1BYLle2nrgjf/HXrBbl0Zk8N+xaoEM017o0BC+k=",
|
||||
"name": "alertmanager"
|
||||
},
|
||||
@ -190,8 +180,8 @@
|
||||
"subdir": "docs/node-mixin"
|
||||
}
|
||||
},
|
||||
"version": "cf8c6891cc610e54f70383addd4bb6079f0add35",
|
||||
"sum": "cQCW+1N0Xae5yXecCWDK2oAlN0luBS/5GrwBYSlaFms="
|
||||
"version": "11365f97bef6cb0e6259d536a7e21c49e3f5c065",
|
||||
"sum": "xYj6VYFT/eafsbleNlC+Z2VfLy1CndyYrJs9BcTmnX8="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -200,8 +190,8 @@
|
||||
"subdir": "documentation/prometheus-mixin"
|
||||
}
|
||||
},
|
||||
"version": "b407c2930da4f50c0d17fc39404c6302a9eb740b",
|
||||
"sum": "OYT5u3S8DbamuJV/v3gbWSteOvFzMeNwMj+u4Apk7jM=",
|
||||
"version": "509b978f0d675b4c9b3ccf8c0fc06961b0f03e8f",
|
||||
"sum": "2c+wttfee9TwuQJZIkNV7Tekem74Qgc7iZ842P28rNw=",
|
||||
"name": "prometheus"
|
||||
},
|
||||
{
|
||||
@ -222,7 +212,7 @@
|
||||
"subdir": "mixin"
|
||||
}
|
||||
},
|
||||
"version": "7037331e6ea7dbe85a1b7af37bf8ea277a80663d",
|
||||
"version": "346d18bb0f8011c63d7106de494cf3b9253161a1",
|
||||
"sum": "ieCD4eMgGbOlrI8GmckGPHBGQDcLasE1rULYq56W/bs=",
|
||||
"name": "thanos-mixin"
|
||||
}
|
||||
|
@ -6,5 +6,5 @@ dashboards:
|
||||
url: https://grafana.com/api/dashboards/9578/revisions/4/download
|
||||
tags: []
|
||||
- name: Prometheus
|
||||
url: https://grafana.com/api/dashboards/3662/revisions/2/download
|
||||
url: https://grafana.com/api/dashboards/19105/revisions/7/download
|
||||
tags: []
|
||||
|
@ -7,7 +7,7 @@
|
||||
"app.kubernetes.io/instance": "main",
|
||||
"app.kubernetes.io/name": "alertmanager",
|
||||
"app.kubernetes.io/part-of": "kube-prometheus",
|
||||
"app.kubernetes.io/version": "0.27.0",
|
||||
"app.kubernetes.io/version": "0.28.0",
|
||||
"prometheus": "k8s",
|
||||
"role": "alert-rules"
|
||||
},
|
||||
|
@ -20,9 +20,9 @@
|
||||
"summary": "etcd cluster members are down."
|
||||
},
|
||||
"expr": "max without (endpoint) (\n sum without (instance, pod) (up{job=~\".*etcd.*\"} == bool 0)\nor\n count without (To) (\n sum without (instance, pod) (rate(etcd_network_peer_sent_failures_total{job=~\".*etcd.*\"}[120s])) > 0.01\n )\n)\n> 0\n",
|
||||
"for": "10m",
|
||||
"for": "20m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
|
@ -6,7 +6,7 @@
|
||||
"app.kubernetes.io/component": "exporter",
|
||||
"app.kubernetes.io/name": "kube-state-metrics",
|
||||
"app.kubernetes.io/part-of": "kube-prometheus",
|
||||
"app.kubernetes.io/version": "2.14.0",
|
||||
"app.kubernetes.io/version": "2.15.0",
|
||||
"prometheus": "k8s",
|
||||
"role": "alert-rules"
|
||||
},
|
||||
|
@ -19,7 +19,7 @@
|
||||
{
|
||||
"alert": "KubePodCrashLooping",
|
||||
"annotations": {
|
||||
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is in waiting state (reason: \"CrashLoopBackOff\").",
|
||||
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is in waiting state (reason: \"CrashLoopBackOff\") on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping",
|
||||
"summary": "Pod is crash looping."
|
||||
},
|
||||
@ -32,7 +32,7 @@
|
||||
{
|
||||
"alert": "KubePodNotReady",
|
||||
"annotations": {
|
||||
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes.",
|
||||
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready",
|
||||
"summary": "Pod has been in a non-ready state for more than 15 minutes."
|
||||
},
|
||||
@ -45,7 +45,7 @@
|
||||
{
|
||||
"alert": "KubeDeploymentGenerationMismatch",
|
||||
"annotations": {
|
||||
"description": "Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back.",
|
||||
"description": "Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch",
|
||||
"summary": "Deployment generation mismatch due to possible roll-back"
|
||||
},
|
||||
@ -58,7 +58,7 @@
|
||||
{
|
||||
"alert": "KubeDeploymentReplicasMismatch",
|
||||
"annotations": {
|
||||
"description": "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes.",
|
||||
"description": "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch",
|
||||
"summary": "Deployment has not matched the expected number of replicas."
|
||||
},
|
||||
@ -71,7 +71,7 @@
|
||||
{
|
||||
"alert": "KubeDeploymentRolloutStuck",
|
||||
"annotations": {
|
||||
"description": "Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment }} is not progressing for longer than 15 minutes.",
|
||||
"description": "Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment }} is not progressing for longer than 15 minutes on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentrolloutstuck",
|
||||
"summary": "Deployment rollout is not progressing."
|
||||
},
|
||||
@ -84,7 +84,7 @@
|
||||
{
|
||||
"alert": "KubeStatefulSetReplicasMismatch",
|
||||
"annotations": {
|
||||
"description": "StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes.",
|
||||
"description": "StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch",
|
||||
"summary": "StatefulSet has not matched the expected number of replicas."
|
||||
},
|
||||
@ -97,7 +97,7 @@
|
||||
{
|
||||
"alert": "KubeStatefulSetGenerationMismatch",
|
||||
"annotations": {
|
||||
"description": "StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back.",
|
||||
"description": "StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch",
|
||||
"summary": "StatefulSet generation mismatch due to possible roll-back"
|
||||
},
|
||||
@ -110,7 +110,7 @@
|
||||
{
|
||||
"alert": "KubeStatefulSetUpdateNotRolledOut",
|
||||
"annotations": {
|
||||
"description": "StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out.",
|
||||
"description": "StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout",
|
||||
"summary": "StatefulSet update has not been rolled out."
|
||||
},
|
||||
@ -123,7 +123,7 @@
|
||||
{
|
||||
"alert": "KubeDaemonSetRolloutStuck",
|
||||
"annotations": {
|
||||
"description": "DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15m.",
|
||||
"description": "DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15m on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck",
|
||||
"summary": "DaemonSet rollout is stuck."
|
||||
},
|
||||
@ -136,7 +136,7 @@
|
||||
{
|
||||
"alert": "KubeContainerWaiting",
|
||||
"annotations": {
|
||||
"description": "pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container {{ $labels.container}} has been in waiting state for longer than 1 hour. (reason: \"{{ $labels.reason }}\").",
|
||||
"description": "pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container {{ $labels.container}} has been in waiting state for longer than 1 hour. (reason: \"{{ $labels.reason }}\") on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting",
|
||||
"summary": "Pod container waiting longer than 1 hour"
|
||||
},
|
||||
@ -149,7 +149,7 @@
|
||||
{
|
||||
"alert": "KubeDaemonSetNotScheduled",
|
||||
"annotations": {
|
||||
"description": "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.",
|
||||
"description": "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled",
|
||||
"summary": "DaemonSet pods are not scheduled."
|
||||
},
|
||||
@ -162,7 +162,7 @@
|
||||
{
|
||||
"alert": "KubeDaemonSetMisScheduled",
|
||||
"annotations": {
|
||||
"description": "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.",
|
||||
"description": "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled",
|
||||
"summary": "DaemonSet pods are misscheduled."
|
||||
},
|
||||
@ -175,7 +175,7 @@
|
||||
{
|
||||
"alert": "KubeJobNotCompleted",
|
||||
"annotations": {
|
||||
"description": "Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than {{ \"43200\" | humanizeDuration }} to complete.",
|
||||
"description": "Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than {{ \"43200\" | humanizeDuration }} to complete on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted",
|
||||
"summary": "Job did not complete in time"
|
||||
},
|
||||
@ -187,7 +187,7 @@
|
||||
{
|
||||
"alert": "KubeJobFailed",
|
||||
"annotations": {
|
||||
"description": "Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. Removing failed job after investigation should clear this alert.",
|
||||
"description": "Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. Removing failed job after investigation should clear this alert on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed",
|
||||
"summary": "Job failed to complete."
|
||||
},
|
||||
@ -200,7 +200,7 @@
|
||||
{
|
||||
"alert": "KubeHpaReplicasMismatch",
|
||||
"annotations": {
|
||||
"description": "HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has not matched the desired number of replicas for longer than 15 minutes.",
|
||||
"description": "HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has not matched the desired number of replicas for longer than 15 minutes on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch",
|
||||
"summary": "HPA has not matched desired number of replicas."
|
||||
},
|
||||
@ -213,7 +213,7 @@
|
||||
{
|
||||
"alert": "KubeHpaMaxedOut",
|
||||
"annotations": {
|
||||
"description": "HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has been running at max replicas for longer than 15 minutes.",
|
||||
"description": "HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has been running at max replicas for longer than 15 minutes on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout",
|
||||
"summary": "HPA is running at max replicas"
|
||||
},
|
||||
@ -257,7 +257,7 @@
|
||||
{
|
||||
"alert": "KubeQuotaAlmostFull",
|
||||
"annotations": {
|
||||
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.",
|
||||
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull",
|
||||
"summary": "Namespace quota is going to be full."
|
||||
},
|
||||
@ -270,7 +270,7 @@
|
||||
{
|
||||
"alert": "KubeQuotaFullyUsed",
|
||||
"annotations": {
|
||||
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.",
|
||||
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused",
|
||||
"summary": "Namespace quota is fully used."
|
||||
},
|
||||
@ -283,7 +283,7 @@
|
||||
{
|
||||
"alert": "KubeQuotaExceeded",
|
||||
"annotations": {
|
||||
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.",
|
||||
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded",
|
||||
"summary": "Namespace quota has exceeded the limits."
|
||||
},
|
||||
@ -296,7 +296,7 @@
|
||||
{
|
||||
"alert": "CPUThrottlingHigh",
|
||||
"annotations": {
|
||||
"description": "{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.",
|
||||
"description": "{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }} on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh",
|
||||
"summary": "Processes experience elevated CPU throttling."
|
||||
},
|
||||
@ -396,7 +396,7 @@
|
||||
{
|
||||
"alert": "KubeVersionMismatch",
|
||||
"annotations": {
|
||||
"description": "There are {{ $value }} different semantic versions of Kubernetes components running.",
|
||||
"description": "There are {{ $value }} different semantic versions of Kubernetes components running on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch",
|
||||
"summary": "Different semantic versions of Kubernetes components running."
|
||||
},
|
||||
@ -409,7 +409,7 @@
|
||||
{
|
||||
"alert": "KubeClientErrors",
|
||||
"annotations": {
|
||||
"description": "Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.'",
|
||||
"description": "Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors",
|
||||
"summary": "Kubernetes API server client is experiencing errors."
|
||||
},
|
||||
@ -427,7 +427,7 @@
|
||||
{
|
||||
"alert": "KubeAPIErrorBudgetBurn",
|
||||
"annotations": {
|
||||
"description": "The API server is burning too much error budget.",
|
||||
"description": "The API server is burning too much error budget on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn",
|
||||
"summary": "The API server is burning too much error budget."
|
||||
},
|
||||
@ -442,7 +442,7 @@
|
||||
{
|
||||
"alert": "KubeAPIErrorBudgetBurn",
|
||||
"annotations": {
|
||||
"description": "The API server is burning too much error budget.",
|
||||
"description": "The API server is burning too much error budget on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn",
|
||||
"summary": "The API server is burning too much error budget."
|
||||
},
|
||||
@ -457,7 +457,7 @@
|
||||
{
|
||||
"alert": "KubeAPIErrorBudgetBurn",
|
||||
"annotations": {
|
||||
"description": "The API server is burning too much error budget.",
|
||||
"description": "The API server is burning too much error budget on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn",
|
||||
"summary": "The API server is burning too much error budget."
|
||||
},
|
||||
@ -472,7 +472,7 @@
|
||||
{
|
||||
"alert": "KubeAPIErrorBudgetBurn",
|
||||
"annotations": {
|
||||
"description": "The API server is burning too much error budget.",
|
||||
"description": "The API server is burning too much error budget on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn",
|
||||
"summary": "The API server is burning too much error budget."
|
||||
},
|
||||
@ -518,11 +518,12 @@
|
||||
{
|
||||
"alert": "KubeAggregatedAPIErrors",
|
||||
"annotations": {
|
||||
"description": "Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m.",
|
||||
"description": "Kubernetes aggregated API {{ $labels.instance }}/{{ $labels.name }} has reported {{ $labels.reason }} errors on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors",
|
||||
"summary": "Kubernetes aggregated API has reported errors."
|
||||
},
|
||||
"expr": "sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job=\"apiserver\"}[10m])) > 4\n",
|
||||
"expr": "sum by(cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job=\"apiserver\"}[1m])) > 0\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
@ -530,7 +531,7 @@
|
||||
{
|
||||
"alert": "KubeAggregatedAPIDown",
|
||||
"annotations": {
|
||||
"description": "Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m.",
|
||||
"description": "Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown",
|
||||
"summary": "Kubernetes aggregated API is down."
|
||||
},
|
||||
@ -556,7 +557,7 @@
|
||||
{
|
||||
"alert": "KubeAPITerminatedRequests",
|
||||
"annotations": {
|
||||
"description": "The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.",
|
||||
"description": "The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests",
|
||||
"summary": "The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests."
|
||||
},
|
||||
@ -574,11 +575,11 @@
|
||||
{
|
||||
"alert": "KubeNodeNotReady",
|
||||
"annotations": {
|
||||
"description": "{{ $labels.node }} has been unready for more than 15 minutes.",
|
||||
"description": "{{ $labels.node }} has been unready for more than 15 minutes on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodenotready",
|
||||
"summary": "Node is not ready."
|
||||
},
|
||||
"expr": "kube_node_status_condition{job=\"kube-state-metrics\",condition=\"Ready\",status=\"true\"} == 0\n",
|
||||
"expr": "kube_node_status_condition{job=\"kube-state-metrics\",condition=\"Ready\",status=\"true\"} == 0\nand on (cluster, node)\nkube_node_spec_unschedulable{job=\"kube-state-metrics\"} == 0\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -587,7 +588,7 @@
|
||||
{
|
||||
"alert": "KubeNodeUnreachable",
|
||||
"annotations": {
|
||||
"description": "{{ $labels.node }} is unreachable and some workloads may be rescheduled.",
|
||||
"description": "{{ $labels.node }} is unreachable and some workloads may be rescheduled on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodeunreachable",
|
||||
"summary": "Node is unreachable."
|
||||
},
|
||||
@ -600,11 +601,11 @@
|
||||
{
|
||||
"alert": "KubeletTooManyPods",
|
||||
"annotations": {
|
||||
"description": "Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.",
|
||||
"description": "Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods",
|
||||
"summary": "Kubelet is running at capacity."
|
||||
},
|
||||
"expr": "count by(cluster, node) (\n (kube_pod_status_phase{job=\"kube-state-metrics\",phase=\"Running\"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job=\"kube-state-metrics\"})\n)\n/\nmax by(cluster, node) (\n kube_node_status_capacity{job=\"kube-state-metrics\",resource=\"pods\"} != 1\n) > 0.95\n",
|
||||
"expr": "(\n max by (cluster, instance) (\n kubelet_running_pods{job=\"kubelet\", metrics_path=\"/metrics\"} > 1\n )\n * on (cluster, instance) group_left(node)\n max by (cluster, instance, node) (\n kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"}\n )\n)\n/ on (cluster, node) group_left()\nmax by (cluster, node) (\n kube_node_status_capacity{job=\"kube-state-metrics\", resource=\"pods\"} != 1\n) > 0.95\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "info"
|
||||
@ -613,11 +614,11 @@
|
||||
{
|
||||
"alert": "KubeNodeReadinessFlapping",
|
||||
"annotations": {
|
||||
"description": "The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes.",
|
||||
"description": "The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping",
|
||||
"summary": "Node readiness status is flapping."
|
||||
},
|
||||
"expr": "sum(changes(kube_node_status_condition{job=\"kube-state-metrics\",status=\"true\",condition=\"Ready\"}[15m])) by (cluster, node) > 2\n",
|
||||
"expr": "sum(changes(kube_node_status_condition{job=\"kube-state-metrics\",status=\"true\",condition=\"Ready\"}[15m])) by (cluster, node) > 2\nand on (cluster, node)\nkube_node_spec_unschedulable{job=\"kube-state-metrics\"} == 0\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -626,7 +627,7 @@
|
||||
{
|
||||
"alert": "KubeletPlegDurationHigh",
|
||||
"annotations": {
|
||||
"description": "The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.",
|
||||
"description": "The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }} on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletplegdurationhigh",
|
||||
"summary": "Kubelet Pod Lifecycle Event Generator is taking too long to relist."
|
||||
},
|
||||
@ -639,7 +640,7 @@
|
||||
{
|
||||
"alert": "KubeletPodStartUpLatencyHigh",
|
||||
"annotations": {
|
||||
"description": "Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.",
|
||||
"description": "Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }} on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh",
|
||||
"summary": "Kubelet Pod startup latency is too high."
|
||||
},
|
||||
@ -652,7 +653,7 @@
|
||||
{
|
||||
"alert": "KubeletClientCertificateExpiration",
|
||||
"annotations": {
|
||||
"description": "Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.",
|
||||
"description": "Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }} on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration",
|
||||
"summary": "Kubelet client certificate is about to expire."
|
||||
},
|
||||
@ -664,7 +665,7 @@
|
||||
{
|
||||
"alert": "KubeletClientCertificateExpiration",
|
||||
"annotations": {
|
||||
"description": "Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.",
|
||||
"description": "Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }} on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration",
|
||||
"summary": "Kubelet client certificate is about to expire."
|
||||
},
|
||||
@ -676,7 +677,7 @@
|
||||
{
|
||||
"alert": "KubeletServerCertificateExpiration",
|
||||
"annotations": {
|
||||
"description": "Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.",
|
||||
"description": "Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }} on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration",
|
||||
"summary": "Kubelet server certificate is about to expire."
|
||||
},
|
||||
@ -688,7 +689,7 @@
|
||||
{
|
||||
"alert": "KubeletServerCertificateExpiration",
|
||||
"annotations": {
|
||||
"description": "Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.",
|
||||
"description": "Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }} on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration",
|
||||
"summary": "Kubelet server certificate is about to expire."
|
||||
},
|
||||
@ -700,7 +701,7 @@
|
||||
{
|
||||
"alert": "KubeletClientCertificateRenewalErrors",
|
||||
"annotations": {
|
||||
"description": "Kubelet on node {{ $labels.node }} has failed to renew its client certificate ({{ $value | humanize }} errors in the last 5 minutes).",
|
||||
"description": "Kubelet on node {{ $labels.node }} has failed to renew its client certificate ({{ $value | humanize }} errors in the last 5 minutes) on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificaterenewalerrors",
|
||||
"summary": "Kubelet has failed to renew its client certificate."
|
||||
},
|
||||
@ -713,7 +714,7 @@
|
||||
{
|
||||
"alert": "KubeletServerCertificateRenewalErrors",
|
||||
"annotations": {
|
||||
"description": "Kubelet on node {{ $labels.node }} has failed to renew its server certificate ({{ $value | humanize }} errors in the last 5 minutes).",
|
||||
"description": "Kubelet on node {{ $labels.node }} has failed to renew its server certificate ({{ $value | humanize }} errors in the last 5 minutes) on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificaterenewalerrors",
|
||||
"summary": "Kubelet has failed to renew its server certificate."
|
||||
},
|
||||
@ -809,25 +810,25 @@
|
||||
"record": "cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster, verb, scope) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{le=\"+Inf\"} * 24 * 30)\n",
|
||||
"expr": "sum by (cluster, verb, scope) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{le=\"+Inf\"})\n",
|
||||
"record": "cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n",
|
||||
"expr": "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=~\"1(\\\\.0)?\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=~\"5(\\\\.0)?\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=~\"30(\\\\.0)?\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n",
|
||||
"labels": {
|
||||
"verb": "all"
|
||||
},
|
||||
"record": "apiserver_request:availability30d"
|
||||
},
|
||||
{
|
||||
"expr": "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n",
|
||||
"expr": "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=~\"5(\\\\.0)?\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=~\"30(\\\\.0)?\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:availability30d"
|
||||
},
|
||||
{
|
||||
"expr": "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n",
|
||||
"expr": "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=~\"1(\\\\.0)?\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
@ -869,98 +870,98 @@
|
||||
"name": "kube-apiserver-burnrate.rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate1d"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate1h"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate2h"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate30m"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate3d"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate5m"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate6h"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate1d"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate1h"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate2h"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate30m"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate3d"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate5m"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
@ -1237,6 +1238,144 @@
|
||||
"record": "node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "windows.node.rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "count by (cluster) (\n windows_system_system_up_time{job=\"kubernetes-windows-exporter\"}\n)\n",
|
||||
"record": "node:windows_node:sum"
|
||||
},
|
||||
{
|
||||
"expr": "count by (cluster, instance) (sum by (cluster, instance, core) (\n windows_cpu_time_total{job=\"kubernetes-windows-exporter\"}\n))\n",
|
||||
"record": "node:windows_node_num_cpu:sum"
|
||||
},
|
||||
{
|
||||
"expr": "1 - avg by (cluster) (rate(windows_cpu_time_total{job=\"kubernetes-windows-exporter\",mode=\"idle\"}[1m]))\n",
|
||||
"record": ":windows_node_cpu_utilisation:avg1m"
|
||||
},
|
||||
{
|
||||
"expr": "1 - avg by (cluster, instance) (\n rate(windows_cpu_time_total{job=\"kubernetes-windows-exporter\",mode=\"idle\"}[1m])\n)\n",
|
||||
"record": "node:windows_node_cpu_utilisation:avg1m"
|
||||
},
|
||||
{
|
||||
"expr": "1 -\nsum by (cluster) (windows_memory_available_bytes{job=\"kubernetes-windows-exporter\"})\n/\nsum by (cluster) (windows_os_visible_memory_bytes{job=\"kubernetes-windows-exporter\"})\n",
|
||||
"record": ":windows_node_memory_utilisation:"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster) (windows_memory_available_bytes{job=\"kubernetes-windows-exporter\"} + windows_memory_cache_bytes{job=\"kubernetes-windows-exporter\"})\n",
|
||||
"record": ":windows_node_memory_MemFreeCached_bytes:sum"
|
||||
},
|
||||
{
|
||||
"expr": "(windows_memory_cache_bytes{job=\"kubernetes-windows-exporter\"} + windows_memory_modified_page_list_bytes{job=\"kubernetes-windows-exporter\"} + windows_memory_standby_cache_core_bytes{job=\"kubernetes-windows-exporter\"} + windows_memory_standby_cache_normal_priority_bytes{job=\"kubernetes-windows-exporter\"} + windows_memory_standby_cache_reserve_bytes{job=\"kubernetes-windows-exporter\"})\n",
|
||||
"record": "node:windows_node_memory_totalCached_bytes:sum"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster) (windows_os_visible_memory_bytes{job=\"kubernetes-windows-exporter\"})\n",
|
||||
"record": ":windows_node_memory_MemTotal_bytes:sum"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster, instance) (\n (windows_memory_available_bytes{job=\"kubernetes-windows-exporter\"})\n)\n",
|
||||
"record": "node:windows_node_memory_bytes_available:sum"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster, instance) (\n windows_os_visible_memory_bytes{job=\"kubernetes-windows-exporter\"}\n)\n",
|
||||
"record": "node:windows_node_memory_bytes_total:sum"
|
||||
},
|
||||
{
|
||||
"expr": "(node:windows_node_memory_bytes_total:sum - node:windows_node_memory_bytes_available:sum)\n/\nscalar(sum(node:windows_node_memory_bytes_total:sum))\n",
|
||||
"record": "node:windows_node_memory_utilisation:ratio"
|
||||
},
|
||||
{
|
||||
"expr": "1 - (node:windows_node_memory_bytes_available:sum / node:windows_node_memory_bytes_total:sum)\n",
|
||||
"record": "node:windows_node_memory_utilisation:"
|
||||
},
|
||||
{
|
||||
"expr": "irate(windows_memory_swap_page_operations_total{job=\"kubernetes-windows-exporter\"}[5m])\n",
|
||||
"record": "node:windows_node_memory_swap_io_pages:irate"
|
||||
},
|
||||
{
|
||||
"expr": "avg by (cluster) (irate(windows_logical_disk_read_seconds_total{job=\"kubernetes-windows-exporter\"}[1m]) +\n irate(windows_logical_disk_write_seconds_total{job=\"kubernetes-windows-exporter\"}[1m])\n )\n",
|
||||
"record": ":windows_node_disk_utilisation:avg_irate"
|
||||
},
|
||||
{
|
||||
"expr": "avg by (cluster, instance) (\n (irate(windows_logical_disk_read_seconds_total{job=\"kubernetes-windows-exporter\"}[1m]) +\n irate(windows_logical_disk_write_seconds_total{job=\"kubernetes-windows-exporter\"}[1m]))\n)\n",
|
||||
"record": "node:windows_node_disk_utilisation:avg_irate"
|
||||
},
|
||||
{
|
||||
"expr": "max by (cluster,instance,volume)(\n (windows_logical_disk_size_bytes{job=\"kubernetes-windows-exporter\"}\n- windows_logical_disk_free_bytes{job=\"kubernetes-windows-exporter\"})\n/ windows_logical_disk_size_bytes{job=\"kubernetes-windows-exporter\"}\n)\n",
|
||||
"record": "node:windows_node_filesystem_usage:"
|
||||
},
|
||||
{
|
||||
"expr": "max by (cluster, instance, volume) (windows_logical_disk_free_bytes{job=\"kubernetes-windows-exporter\"} / windows_logical_disk_size_bytes{job=\"kubernetes-windows-exporter\"})\n",
|
||||
"record": "node:windows_node_filesystem_avail:"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster) (irate(windows_net_bytes_total{job=\"kubernetes-windows-exporter\"}[1m]))\n",
|
||||
"record": ":windows_node_net_utilisation:sum_irate"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster, instance) (\n (irate(windows_net_bytes_total{job=\"kubernetes-windows-exporter\"}[1m]))\n)\n",
|
||||
"record": "node:windows_node_net_utilisation:sum_irate"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster) (irate(windows_net_packets_received_discarded_total{job=\"kubernetes-windows-exporter\"}[1m])) +\nsum by (cluster) (irate(windows_net_packets_outbound_discarded_total{job=\"kubernetes-windows-exporter\"}[1m]))\n",
|
||||
"record": ":windows_node_net_saturation:sum_irate"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster, instance) (\n (irate(windows_net_packets_received_discarded_total{job=\"kubernetes-windows-exporter\"}[1m]) +\n irate(windows_net_packets_outbound_discarded_total{job=\"kubernetes-windows-exporter\"}[1m]))\n)\n",
|
||||
"record": "node:windows_node_net_saturation:sum_irate"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "windows.pod.rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "windows_container_available{job=\"kubernetes-windows-exporter\", container_id != \"\"} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace, cluster)\n",
|
||||
"record": "windows_pod_container_available"
|
||||
},
|
||||
{
|
||||
"expr": "windows_container_cpu_usage_seconds_total{job=\"kubernetes-windows-exporter\", container_id != \"\"} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace, cluster)\n",
|
||||
"record": "windows_container_total_runtime"
|
||||
},
|
||||
{
|
||||
"expr": "windows_container_memory_usage_commit_bytes{job=\"kubernetes-windows-exporter\", container_id != \"\"} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace, cluster)\n",
|
||||
"record": "windows_container_memory_usage"
|
||||
},
|
||||
{
|
||||
"expr": "windows_container_memory_usage_private_working_set_bytes{job=\"kubernetes-windows-exporter\", container_id != \"\"} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace, cluster)\n",
|
||||
"record": "windows_container_private_working_set_usage"
|
||||
},
|
||||
{
|
||||
"expr": "windows_container_network_receive_bytes_total{job=\"kubernetes-windows-exporter\", container_id != \"\"} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace, cluster)\n",
|
||||
"record": "windows_container_network_received_bytes_total"
|
||||
},
|
||||
{
|
||||
"expr": "windows_container_network_transmit_bytes_total{job=\"kubernetes-windows-exporter\", container_id != \"\"} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace, cluster)\n",
|
||||
"record": "windows_container_network_transmitted_bytes_total"
|
||||
},
|
||||
{
|
||||
"expr": "max by (cluster, namespace, pod, container) (\n kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"}\n) * on(container,pod,namespace,cluster) (windows_pod_container_available)\n",
|
||||
"record": "kube_pod_windows_container_resource_memory_request"
|
||||
},
|
||||
{
|
||||
"expr": "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on(container,pod,namespace,cluster) (windows_pod_container_available)\n",
|
||||
"record": "kube_pod_windows_container_resource_memory_limit"
|
||||
},
|
||||
{
|
||||
"expr": "max by (cluster, namespace, pod, container) (\n kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"}\n) * on(container,pod,namespace,cluster) (windows_pod_container_available)\n",
|
||||
"record": "kube_pod_windows_container_resource_cpu_cores_request"
|
||||
},
|
||||
{
|
||||
"expr": "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on(container,pod,namespace,cluster) (windows_pod_container_available)\n",
|
||||
"record": "kube_pod_windows_container_resource_cpu_cores_limit"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster, namespace, pod, container) (\n rate(windows_container_total_runtime{}[5m])\n)\n",
|
||||
"record": "namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -151,7 +151,7 @@
|
||||
{
|
||||
"alert": "NodeHighNumberConntrackEntriesUsed",
|
||||
"annotations": {
|
||||
"description": "{{ $value | humanizePercentage }} of conntrack entries are used.",
|
||||
"description": "{{ $labels.instance }} {{ $value | humanizePercentage }} of conntrack entries are used.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused",
|
||||
"summary": "Number of conntrack are getting close to the limit."
|
||||
},
|
||||
@ -256,7 +256,7 @@
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/node/nodecpuhighusage",
|
||||
"summary": "High CPU usage."
|
||||
},
|
||||
"expr": "sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode!=\"idle\"}[2m]))) * 100 > 90\n",
|
||||
"expr": "sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode!~\"idle|iowait\"}[2m]))) * 100 > 90\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "info"
|
||||
|
@ -6,7 +6,7 @@
|
||||
"app.kubernetes.io/component": "controller",
|
||||
"app.kubernetes.io/name": "prometheus-operator",
|
||||
"app.kubernetes.io/part-of": "kube-prometheus",
|
||||
"app.kubernetes.io/version": "0.78.2",
|
||||
"app.kubernetes.io/version": "0.80.0",
|
||||
"prometheus": "k8s",
|
||||
"role": "alert-rules"
|
||||
},
|
||||
|
@ -7,7 +7,7 @@
|
||||
"app.kubernetes.io/instance": "k8s",
|
||||
"app.kubernetes.io/name": "prometheus",
|
||||
"app.kubernetes.io/part-of": "kube-prometheus",
|
||||
"app.kubernetes.io/version": "3.0.1",
|
||||
"app.kubernetes.io/version": "3.1.0",
|
||||
"prometheus": "k8s",
|
||||
"role": "alert-rules"
|
||||
},
|
||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -14,9 +14,9 @@ spec:
|
||||
description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": members are down ({{`{{`}} $value {{`}}`}}).'
|
||||
summary: etcd cluster members are down.
|
||||
expr: "max without (endpoint) (\n sum without (instance, pod) (up{job=~\".*etcd.*\"} == bool 0)\nor\n count without (To) (\n sum without (instance, pod) (rate(etcd_network_peer_sent_failures_total{job=~\".*etcd.*\"}[120s])) > 0.01\n )\n)\n> 0\n"
|
||||
for: 10m
|
||||
for: 20m
|
||||
labels:
|
||||
severity: critical
|
||||
severity: warning
|
||||
- alert: etcdInsufficientMembers
|
||||
annotations:
|
||||
description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": insufficient members ({{`{{`}} $value {{`}}`}}).'
|
||||
|
@ -11,7 +11,7 @@ spec:
|
||||
rules:
|
||||
- alert: KubePodCrashLooping
|
||||
annotations:
|
||||
description: 'Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is in waiting state (reason: "CrashLoopBackOff").'
|
||||
description: 'Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is in waiting state (reason: "CrashLoopBackOff") on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping
|
||||
summary: Pod is crash looping.
|
||||
expr: 'max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", job="kube-state-metrics"}[5m]) >= 1
|
||||
@ -22,7 +22,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubePodNotReady
|
||||
annotations:
|
||||
description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than 15 minutes.
|
||||
description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready
|
||||
summary: Pod has been in a non-ready state for more than 15 minutes.
|
||||
expr: "sum by (namespace, pod, cluster) (\n max by(namespace, pod, cluster) (\n kube_pod_status_phase{job=\"kube-state-metrics\", phase=~\"Pending|Unknown|Failed\"}\n ) * on(namespace, pod, cluster) group_left(owner_kind) topk by(namespace, pod, cluster) (\n 1, max by(namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!=\"Job\"})\n )\n) > 0\n"
|
||||
@ -31,7 +31,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDeploymentGenerationMismatch
|
||||
annotations:
|
||||
description: Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back.
|
||||
description: Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch
|
||||
summary: Deployment generation mismatch due to possible roll-back
|
||||
expr: "kube_deployment_status_observed_generation{job=\"kube-state-metrics\"}\n !=\nkube_deployment_metadata_generation{job=\"kube-state-metrics\"}\n"
|
||||
@ -40,7 +40,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDeploymentReplicasMismatch
|
||||
annotations:
|
||||
description: Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.
|
||||
description: Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch
|
||||
summary: Deployment has not matched the expected number of replicas.
|
||||
expr: "(\n kube_deployment_spec_replicas{job=\"kube-state-metrics\"}\n >\n kube_deployment_status_replicas_available{job=\"kube-state-metrics\"}\n) and (\n changes(kube_deployment_status_replicas_updated{job=\"kube-state-metrics\"}[10m])\n ==\n 0\n)\n"
|
||||
@ -49,7 +49,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDeploymentRolloutStuck
|
||||
annotations:
|
||||
description: Rollout of deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} is not progressing for longer than 15 minutes.
|
||||
description: Rollout of deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} is not progressing for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentrolloutstuck
|
||||
summary: Deployment rollout is not progressing.
|
||||
expr: 'kube_deployment_status_condition{condition="Progressing", status="false",job="kube-state-metrics"}
|
||||
@ -62,7 +62,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetReplicasMismatch
|
||||
annotations:
|
||||
description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.
|
||||
description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch
|
||||
summary: StatefulSet has not matched the expected number of replicas.
|
||||
expr: "(\n kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\"}\n !=\n kube_statefulset_status_replicas{job=\"kube-state-metrics\"}\n) and (\n changes(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\"}[10m])\n ==\n 0\n)\n"
|
||||
@ -71,7 +71,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetGenerationMismatch
|
||||
annotations:
|
||||
description: StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
|
||||
description: StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch
|
||||
summary: StatefulSet generation mismatch due to possible roll-back
|
||||
expr: "kube_statefulset_status_observed_generation{job=\"kube-state-metrics\"}\n !=\nkube_statefulset_metadata_generation{job=\"kube-state-metrics\"}\n"
|
||||
@ -80,7 +80,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetUpdateNotRolledOut
|
||||
annotations:
|
||||
description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out.
|
||||
description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout
|
||||
summary: StatefulSet update has not been rolled out.
|
||||
expr: "(\n max by(namespace, statefulset, job, cluster) (\n kube_statefulset_status_current_revision{job=\"kube-state-metrics\"}\n unless\n kube_statefulset_status_update_revision{job=\"kube-state-metrics\"}\n )\n *\n (\n kube_statefulset_replicas{job=\"kube-state-metrics\"}\n !=\n kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\"}\n )\n) and (\n changes(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n"
|
||||
@ -89,7 +89,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetRolloutStuck
|
||||
annotations:
|
||||
description: DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15m.
|
||||
description: DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15m on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck
|
||||
summary: DaemonSet rollout is stuck.
|
||||
expr: "(\n (\n kube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_misscheduled{job=\"kube-state-metrics\"}\n !=\n 0\n ) or (\n kube_daemonset_status_updated_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_available{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n )\n) and (\n changes(kube_daemonset_status_updated_number_scheduled{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n"
|
||||
@ -98,7 +98,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeContainerWaiting
|
||||
annotations:
|
||||
description: 'pod/{{`{{`}} $labels.pod {{`}}`}} in namespace {{`{{`}} $labels.namespace {{`}}`}} on container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour. (reason: "{{`{{`}} $labels.reason {{`}}`}}").'
|
||||
description: 'pod/{{`{{`}} $labels.pod {{`}}`}} in namespace {{`{{`}} $labels.namespace {{`}}`}} on container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour. (reason: "{{`{{`}} $labels.reason {{`}}`}}") on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting
|
||||
summary: Pod container waiting longer than 1 hour
|
||||
expr: 'kube_pod_container_status_waiting_reason{reason!="CrashLoopBackOff", job="kube-state-metrics"} > 0
|
||||
@ -109,7 +109,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetNotScheduled
|
||||
annotations:
|
||||
description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled.'
|
||||
description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled
|
||||
summary: DaemonSet pods are not scheduled.
|
||||
expr: "kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n -\nkube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"} > 0\n"
|
||||
@ -118,7 +118,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetMisScheduled
|
||||
annotations:
|
||||
description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run.'
|
||||
description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled
|
||||
summary: DaemonSet pods are misscheduled.
|
||||
expr: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
|
||||
@ -129,7 +129,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeJobNotCompleted
|
||||
annotations:
|
||||
description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than {{`{{`}} "43200" | humanizeDuration {{`}}`}} to complete.
|
||||
description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than {{`{{`}} "43200" | humanizeDuration {{`}}`}} to complete on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted
|
||||
summary: Job did not complete in time
|
||||
expr: "time() - max by(namespace, job_name, cluster) (kube_job_status_start_time{job=\"kube-state-metrics\"}\n and\nkube_job_status_active{job=\"kube-state-metrics\"} > 0) > 43200\n"
|
||||
@ -137,7 +137,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeJobFailed
|
||||
annotations:
|
||||
description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete. Removing failed job after investigation should clear this alert.
|
||||
description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete. Removing failed job after investigation should clear this alert on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed
|
||||
summary: Job failed to complete.
|
||||
expr: 'kube_job_failed{job="kube-state-metrics"} > 0
|
||||
@ -148,7 +148,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeHpaReplicasMismatch
|
||||
annotations:
|
||||
description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes.
|
||||
description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch
|
||||
summary: HPA has not matched desired number of replicas.
|
||||
expr: "(kube_horizontalpodautoscaler_status_desired_replicas{job=\"kube-state-metrics\"}\n !=\nkube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"})\n and\n(kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"}\n >\nkube_horizontalpodautoscaler_spec_min_replicas{job=\"kube-state-metrics\"})\n and\n(kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"}\n <\nkube_horizontalpodautoscaler_spec_max_replicas{job=\"kube-state-metrics\"})\n and\nchanges(kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"}[15m]) == 0\n"
|
||||
@ -157,7 +157,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeHpaMaxedOut
|
||||
annotations:
|
||||
description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has been running at max replicas for longer than 15 minutes.
|
||||
description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has been running at max replicas for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout
|
||||
summary: HPA is running at max replicas
|
||||
expr: "kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"}\n ==\nkube_horizontalpodautoscaler_spec_max_replicas{job=\"kube-state-metrics\"}\n"
|
||||
@ -186,7 +186,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeQuotaAlmostFull
|
||||
annotations:
|
||||
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.
|
||||
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull
|
||||
summary: Namespace quota is going to be full.
|
||||
expr: "kube_resourcequota{job=\"kube-state-metrics\", type=\"used\"}\n / ignoring(instance, job, type)\n(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\"} > 0)\n > 0.9 < 1\n"
|
||||
@ -195,7 +195,7 @@ spec:
|
||||
severity: info
|
||||
- alert: KubeQuotaFullyUsed
|
||||
annotations:
|
||||
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.
|
||||
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused
|
||||
summary: Namespace quota is fully used.
|
||||
expr: "kube_resourcequota{job=\"kube-state-metrics\", type=\"used\"}\n / ignoring(instance, job, type)\n(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\"} > 0)\n == 1\n"
|
||||
@ -204,7 +204,7 @@ spec:
|
||||
severity: info
|
||||
- alert: KubeQuotaExceeded
|
||||
annotations:
|
||||
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.
|
||||
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded
|
||||
summary: Namespace quota has exceeded the limits.
|
||||
expr: "kube_resourcequota{job=\"kube-state-metrics\", type=\"used\"}\n / ignoring(instance, job, type)\n(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\"} > 0)\n > 1\n"
|
||||
@ -213,7 +213,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: CPUThrottlingHigh
|
||||
annotations:
|
||||
description: '{{`{{`}} $value | humanizePercentage {{`}}`}} throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container {{`}}`}} in pod {{`{{`}} $labels.pod {{`}}`}}.'
|
||||
description: '{{`{{`}} $value | humanizePercentage {{`}}`}} throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container {{`}}`}} in pod {{`{{`}} $labels.pod {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh
|
||||
summary: Processes experience elevated CPU throttling.
|
||||
expr: "sum(increase(container_cpu_cfs_throttled_periods_total{container!=\"\", job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", }[5m])) without (id, metrics_path, name, image, endpoint, job, node)\n /\nsum(increase(container_cpu_cfs_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", }[5m])) without (id, metrics_path, name, image, endpoint, job, node)\n > ( 25 / 100 )\n"
|
||||
@ -281,7 +281,7 @@ spec:
|
||||
rules:
|
||||
- alert: KubeVersionMismatch
|
||||
annotations:
|
||||
description: There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running.
|
||||
description: There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch
|
||||
summary: Different semantic versions of Kubernetes components running.
|
||||
expr: 'count by (cluster) (count by (git_version, cluster) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"git_version","$1","git_version","(v[0-9]*.[0-9]*).*"))) > 1
|
||||
@ -292,7 +292,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeClientErrors
|
||||
annotations:
|
||||
description: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} $value | humanizePercentage {{`}}`}} errors.'
|
||||
description: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} $value | humanizePercentage {{`}}`}} errors on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors
|
||||
summary: Kubernetes API server client is experiencing errors.
|
||||
expr: "(sum(rate(rest_client_requests_total{job=\"apiserver\",code=~\"5..\"}[5m])) by (cluster, instance, job, namespace)\n /\nsum(rate(rest_client_requests_total{job=\"apiserver\"}[5m])) by (cluster, instance, job, namespace))\n> 0.01\n"
|
||||
@ -303,7 +303,7 @@ spec:
|
||||
rules:
|
||||
- alert: KubeAPIErrorBudgetBurn
|
||||
annotations:
|
||||
description: The API server is burning too much error budget.
|
||||
description: The API server is burning too much error budget on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn
|
||||
summary: The API server is burning too much error budget.
|
||||
expr: 'sum by(cluster) (apiserver_request:burnrate1h) > (14.40 * 0.01000)
|
||||
@ -320,7 +320,7 @@ spec:
|
||||
short: 5m
|
||||
- alert: KubeAPIErrorBudgetBurn
|
||||
annotations:
|
||||
description: The API server is burning too much error budget.
|
||||
description: The API server is burning too much error budget on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn
|
||||
summary: The API server is burning too much error budget.
|
||||
expr: 'sum by(cluster) (apiserver_request:burnrate6h) > (6.00 * 0.01000)
|
||||
@ -337,7 +337,7 @@ spec:
|
||||
short: 30m
|
||||
- alert: KubeAPIErrorBudgetBurn
|
||||
annotations:
|
||||
description: The API server is burning too much error budget.
|
||||
description: The API server is burning too much error budget on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn
|
||||
summary: The API server is burning too much error budget.
|
||||
expr: 'sum by(cluster) (apiserver_request:burnrate1d) > (3.00 * 0.01000)
|
||||
@ -354,7 +354,7 @@ spec:
|
||||
short: 2h
|
||||
- alert: KubeAPIErrorBudgetBurn
|
||||
annotations:
|
||||
description: The API server is burning too much error budget.
|
||||
description: The API server is burning too much error budget on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn
|
||||
summary: The API server is burning too much error budget.
|
||||
expr: 'sum by(cluster) (apiserver_request:burnrate3d) > (1.00 * 0.01000)
|
||||
@ -403,17 +403,18 @@ spec:
|
||||
severity: critical
|
||||
- alert: KubeAggregatedAPIErrors
|
||||
annotations:
|
||||
description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has reported errors. It has appeared unavailable {{`{{`}} $value | humanize {{`}}`}} times averaged over the past 10m.
|
||||
description: Kubernetes aggregated API {{`{{`}} $labels.instance {{`}}`}}/{{`{{`}} $labels.name {{`}}`}} has reported {{`{{`}} $labels.reason {{`}}`}} errors on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors
|
||||
summary: Kubernetes aggregated API has reported errors.
|
||||
expr: 'sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[10m])) > 4
|
||||
expr: 'sum by(cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[1m])) > 0
|
||||
|
||||
'
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeAggregatedAPIDown
|
||||
annotations:
|
||||
description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m.
|
||||
description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown
|
||||
summary: Kubernetes aggregated API is down.
|
||||
expr: '(1 - max by(name, namespace, cluster)(avg_over_time(aggregator_unavailable_apiservice{job="apiserver"}[10m]))) * 100 < 85
|
||||
@ -435,7 +436,7 @@ spec:
|
||||
severity: critical
|
||||
- alert: KubeAPITerminatedRequests
|
||||
annotations:
|
||||
description: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
|
||||
description: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests
|
||||
summary: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
|
||||
expr: 'sum by(cluster) (rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / ( sum by(cluster) (rate(apiserver_request_total{job="apiserver"}[10m])) + sum by(cluster) (rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20
|
||||
@ -448,18 +449,22 @@ spec:
|
||||
rules:
|
||||
- alert: KubeNodeNotReady
|
||||
annotations:
|
||||
description: '{{`{{`}} $labels.node {{`}}`}} has been unready for more than 15 minutes.'
|
||||
description: '{{`{{`}} $labels.node {{`}}`}} has been unready for more than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodenotready
|
||||
summary: Node is not ready.
|
||||
expr: 'kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
|
||||
|
||||
and on (cluster, node)
|
||||
|
||||
kube_node_spec_unschedulable{job="kube-state-metrics"} == 0
|
||||
|
||||
'
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeNodeUnreachable
|
||||
annotations:
|
||||
description: '{{`{{`}} $labels.node {{`}}`}} is unreachable and some workloads may be rescheduled.'
|
||||
description: '{{`{{`}} $labels.node {{`}}`}} is unreachable and some workloads may be rescheduled on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodeunreachable
|
||||
summary: Node is unreachable.
|
||||
expr: '(kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="kube-state-metrics",key=~"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn"}) == 1
|
||||
@ -470,27 +475,31 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeletTooManyPods
|
||||
annotations:
|
||||
description: Kubelet '{{`{{`}} $labels.node {{`}}`}}' is running at {{`{{`}} $value | humanizePercentage {{`}}`}} of its Pod capacity.
|
||||
description: Kubelet '{{`{{`}} $labels.node {{`}}`}}' is running at {{`{{`}} $value | humanizePercentage {{`}}`}} of its Pod capacity on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods
|
||||
summary: Kubelet is running at capacity.
|
||||
expr: "count by(cluster, node) (\n (kube_pod_status_phase{job=\"kube-state-metrics\",phase=\"Running\"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job=\"kube-state-metrics\"})\n)\n/\nmax by(cluster, node) (\n kube_node_status_capacity{job=\"kube-state-metrics\",resource=\"pods\"} != 1\n) > 0.95\n"
|
||||
expr: "(\n max by (cluster, instance) (\n kubelet_running_pods{job=\"kubelet\", metrics_path=\"/metrics\"} > 1\n )\n * on (cluster, instance) group_left(node)\n max by (cluster, instance, node) (\n kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"}\n )\n)\n/ on (cluster, node) group_left()\nmax by (cluster, node) (\n kube_node_status_capacity{job=\"kube-state-metrics\", resource=\"pods\"} != 1\n) > 0.95\n"
|
||||
for: 15m
|
||||
labels:
|
||||
severity: info
|
||||
- alert: KubeNodeReadinessFlapping
|
||||
annotations:
|
||||
description: The readiness status of node {{`{{`}} $labels.node {{`}}`}} has changed {{`{{`}} $value {{`}}`}} times in the last 15 minutes.
|
||||
description: The readiness status of node {{`{{`}} $labels.node {{`}}`}} has changed {{`{{`}} $value {{`}}`}} times in the last 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping
|
||||
summary: Node readiness status is flapping.
|
||||
expr: 'sum(changes(kube_node_status_condition{job="kube-state-metrics",status="true",condition="Ready"}[15m])) by (cluster, node) > 2
|
||||
|
||||
and on (cluster, node)
|
||||
|
||||
kube_node_spec_unschedulable{job="kube-state-metrics"} == 0
|
||||
|
||||
'
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeletPlegDurationHigh
|
||||
annotations:
|
||||
description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}.
|
||||
description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletplegdurationhigh
|
||||
summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist.
|
||||
expr: 'node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
|
||||
@ -501,7 +510,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeletPodStartUpLatencyHigh
|
||||
annotations:
|
||||
description: Kubelet Pod startup 99th percentile latency is {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}.
|
||||
description: Kubelet Pod startup 99th percentile latency is {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh
|
||||
summary: Kubelet Pod startup latency is too high.
|
||||
expr: 'histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le)) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
|
||||
@ -512,7 +521,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeletClientCertificateExpiration
|
||||
annotations:
|
||||
description: Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.
|
||||
description: Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration
|
||||
summary: Kubelet client certificate is about to expire.
|
||||
expr: 'kubelet_certificate_manager_client_ttl_seconds < 604800
|
||||
@ -522,7 +531,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeletClientCertificateExpiration
|
||||
annotations:
|
||||
description: Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.
|
||||
description: Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration
|
||||
summary: Kubelet client certificate is about to expire.
|
||||
expr: 'kubelet_certificate_manager_client_ttl_seconds < 86400
|
||||
@ -532,7 +541,7 @@ spec:
|
||||
severity: critical
|
||||
- alert: KubeletServerCertificateExpiration
|
||||
annotations:
|
||||
description: Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.
|
||||
description: Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration
|
||||
summary: Kubelet server certificate is about to expire.
|
||||
expr: 'kubelet_certificate_manager_server_ttl_seconds < 604800
|
||||
@ -542,7 +551,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeletServerCertificateExpiration
|
||||
annotations:
|
||||
description: Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.
|
||||
description: Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration
|
||||
summary: Kubelet server certificate is about to expire.
|
||||
expr: 'kubelet_certificate_manager_server_ttl_seconds < 86400
|
||||
@ -552,7 +561,7 @@ spec:
|
||||
severity: critical
|
||||
- alert: KubeletClientCertificateRenewalErrors
|
||||
annotations:
|
||||
description: Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its client certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes).
|
||||
description: Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its client certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes) on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificaterenewalerrors
|
||||
summary: Kubelet has failed to renew its client certificate.
|
||||
expr: 'increase(kubelet_certificate_manager_client_expiration_renew_errors[5m]) > 0
|
||||
@ -563,7 +572,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeletServerCertificateRenewalErrors
|
||||
annotations:
|
||||
description: Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its server certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes).
|
||||
description: Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its server certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes) on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificaterenewalerrors
|
||||
summary: Kubelet has failed to renew its server certificate.
|
||||
expr: 'increase(kubelet_server_expiration_renew_errors[5m]) > 0
|
||||
@ -640,20 +649,20 @@ spec:
|
||||
|
||||
'
|
||||
record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h
|
||||
- expr: 'sum by (cluster, verb, scope) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{le="+Inf"} * 24 * 30)
|
||||
- expr: 'sum by (cluster, verb, scope) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{le="+Inf"})
|
||||
|
||||
'
|
||||
record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d
|
||||
- expr: "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"\
|
||||
cluster\",le=\"30\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n"
|
||||
- expr: "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=~\"1(\\\\.0)?\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=~\"5(\\\\.0)?\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"\
|
||||
LIST|GET\",scope=\"cluster\",le=~\"30(\\\\.0)?\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n"
|
||||
labels:
|
||||
verb: all
|
||||
record: apiserver_request:availability30d
|
||||
- expr: "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n"
|
||||
- expr: "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=~\"5(\\\\.0)?\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=~\"30(\\\\.0)?\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:availability30d
|
||||
- expr: "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n"
|
||||
- expr: "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=~\"1(\\\\.0)?\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:availability30d
|
||||
@ -687,66 +696,66 @@ spec:
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- name: kube-apiserver-burnrate.rules
|
||||
rules:
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
|
||||
5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\
|
||||
,verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate1d
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
|
||||
5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\
|
||||
,verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate1h
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
|
||||
5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\
|
||||
,verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate2h
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"\
|
||||
,code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"\
|
||||
apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate30m
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
|
||||
5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\
|
||||
,verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate3d
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
|
||||
5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\
|
||||
,verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate5m
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
|
||||
5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\
|
||||
,verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate6h
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate1d
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate1h
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate2h
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate30m
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate3d
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate5m
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate6h
|
||||
@ -916,4 +925,122 @@ spec:
|
||||
labels:
|
||||
quantile: '0.5'
|
||||
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
|
||||
- name: windows.node.rules
|
||||
rules:
|
||||
- expr: "count by (cluster) (\n windows_system_system_up_time{job=\"kubernetes-windows-exporter\"}\n)\n"
|
||||
record: node:windows_node:sum
|
||||
- expr: "count by (cluster, instance) (sum by (cluster, instance, core) (\n windows_cpu_time_total{job=\"kubernetes-windows-exporter\"}\n))\n"
|
||||
record: node:windows_node_num_cpu:sum
|
||||
- expr: '1 - avg by (cluster) (rate(windows_cpu_time_total{job="kubernetes-windows-exporter",mode="idle"}[1m]))
|
||||
|
||||
'
|
||||
record: :windows_node_cpu_utilisation:avg1m
|
||||
- expr: "1 - avg by (cluster, instance) (\n rate(windows_cpu_time_total{job=\"kubernetes-windows-exporter\",mode=\"idle\"}[1m])\n)\n"
|
||||
record: node:windows_node_cpu_utilisation:avg1m
|
||||
- expr: '1 -
|
||||
|
||||
sum by (cluster) (windows_memory_available_bytes{job="kubernetes-windows-exporter"})
|
||||
|
||||
/
|
||||
|
||||
sum by (cluster) (windows_os_visible_memory_bytes{job="kubernetes-windows-exporter"})
|
||||
|
||||
'
|
||||
record: ':windows_node_memory_utilisation:'
|
||||
- expr: 'sum by (cluster) (windows_memory_available_bytes{job="kubernetes-windows-exporter"} + windows_memory_cache_bytes{job="kubernetes-windows-exporter"})
|
||||
|
||||
'
|
||||
record: :windows_node_memory_MemFreeCached_bytes:sum
|
||||
- expr: '(windows_memory_cache_bytes{job="kubernetes-windows-exporter"} + windows_memory_modified_page_list_bytes{job="kubernetes-windows-exporter"} + windows_memory_standby_cache_core_bytes{job="kubernetes-windows-exporter"} + windows_memory_standby_cache_normal_priority_bytes{job="kubernetes-windows-exporter"} + windows_memory_standby_cache_reserve_bytes{job="kubernetes-windows-exporter"})
|
||||
|
||||
'
|
||||
record: node:windows_node_memory_totalCached_bytes:sum
|
||||
- expr: 'sum by (cluster) (windows_os_visible_memory_bytes{job="kubernetes-windows-exporter"})
|
||||
|
||||
'
|
||||
record: :windows_node_memory_MemTotal_bytes:sum
|
||||
- expr: "sum by (cluster, instance) (\n (windows_memory_available_bytes{job=\"kubernetes-windows-exporter\"})\n)\n"
|
||||
record: node:windows_node_memory_bytes_available:sum
|
||||
- expr: "sum by (cluster, instance) (\n windows_os_visible_memory_bytes{job=\"kubernetes-windows-exporter\"}\n)\n"
|
||||
record: node:windows_node_memory_bytes_total:sum
|
||||
- expr: '(node:windows_node_memory_bytes_total:sum - node:windows_node_memory_bytes_available:sum)
|
||||
|
||||
/
|
||||
|
||||
scalar(sum(node:windows_node_memory_bytes_total:sum))
|
||||
|
||||
'
|
||||
record: node:windows_node_memory_utilisation:ratio
|
||||
- expr: '1 - (node:windows_node_memory_bytes_available:sum / node:windows_node_memory_bytes_total:sum)
|
||||
|
||||
'
|
||||
record: 'node:windows_node_memory_utilisation:'
|
||||
- expr: 'irate(windows_memory_swap_page_operations_total{job="kubernetes-windows-exporter"}[5m])
|
||||
|
||||
'
|
||||
record: node:windows_node_memory_swap_io_pages:irate
|
||||
- expr: "avg by (cluster) (irate(windows_logical_disk_read_seconds_total{job=\"kubernetes-windows-exporter\"}[1m]) +\n irate(windows_logical_disk_write_seconds_total{job=\"kubernetes-windows-exporter\"}[1m])\n )\n"
|
||||
record: :windows_node_disk_utilisation:avg_irate
|
||||
- expr: "avg by (cluster, instance) (\n (irate(windows_logical_disk_read_seconds_total{job=\"kubernetes-windows-exporter\"}[1m]) +\n irate(windows_logical_disk_write_seconds_total{job=\"kubernetes-windows-exporter\"}[1m]))\n)\n"
|
||||
record: node:windows_node_disk_utilisation:avg_irate
|
||||
- expr: "max by (cluster,instance,volume)(\n (windows_logical_disk_size_bytes{job=\"kubernetes-windows-exporter\"}\n- windows_logical_disk_free_bytes{job=\"kubernetes-windows-exporter\"})\n/ windows_logical_disk_size_bytes{job=\"kubernetes-windows-exporter\"}\n)\n"
|
||||
record: 'node:windows_node_filesystem_usage:'
|
||||
- expr: 'max by (cluster, instance, volume) (windows_logical_disk_free_bytes{job="kubernetes-windows-exporter"} / windows_logical_disk_size_bytes{job="kubernetes-windows-exporter"})
|
||||
|
||||
'
|
||||
record: 'node:windows_node_filesystem_avail:'
|
||||
- expr: 'sum by (cluster) (irate(windows_net_bytes_total{job="kubernetes-windows-exporter"}[1m]))
|
||||
|
||||
'
|
||||
record: :windows_node_net_utilisation:sum_irate
|
||||
- expr: "sum by (cluster, instance) (\n (irate(windows_net_bytes_total{job=\"kubernetes-windows-exporter\"}[1m]))\n)\n"
|
||||
record: node:windows_node_net_utilisation:sum_irate
|
||||
- expr: 'sum by (cluster) (irate(windows_net_packets_received_discarded_total{job="kubernetes-windows-exporter"}[1m])) +
|
||||
|
||||
sum by (cluster) (irate(windows_net_packets_outbound_discarded_total{job="kubernetes-windows-exporter"}[1m]))
|
||||
|
||||
'
|
||||
record: :windows_node_net_saturation:sum_irate
|
||||
- expr: "sum by (cluster, instance) (\n (irate(windows_net_packets_received_discarded_total{job=\"kubernetes-windows-exporter\"}[1m]) +\n irate(windows_net_packets_outbound_discarded_total{job=\"kubernetes-windows-exporter\"}[1m]))\n)\n"
|
||||
record: node:windows_node_net_saturation:sum_irate
|
||||
- name: windows.pod.rules
|
||||
rules:
|
||||
- expr: 'windows_container_available{job="kubernetes-windows-exporter", container_id != ""} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace, cluster)
|
||||
|
||||
'
|
||||
record: windows_pod_container_available
|
||||
- expr: 'windows_container_cpu_usage_seconds_total{job="kubernetes-windows-exporter", container_id != ""} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace, cluster)
|
||||
|
||||
'
|
||||
record: windows_container_total_runtime
|
||||
- expr: 'windows_container_memory_usage_commit_bytes{job="kubernetes-windows-exporter", container_id != ""} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace, cluster)
|
||||
|
||||
'
|
||||
record: windows_container_memory_usage
|
||||
- expr: 'windows_container_memory_usage_private_working_set_bytes{job="kubernetes-windows-exporter", container_id != ""} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace, cluster)
|
||||
|
||||
'
|
||||
record: windows_container_private_working_set_usage
|
||||
- expr: 'windows_container_network_receive_bytes_total{job="kubernetes-windows-exporter", container_id != ""} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace, cluster)
|
||||
|
||||
'
|
||||
record: windows_container_network_received_bytes_total
|
||||
- expr: 'windows_container_network_transmit_bytes_total{job="kubernetes-windows-exporter", container_id != ""} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace, cluster)
|
||||
|
||||
'
|
||||
record: windows_container_network_transmitted_bytes_total
|
||||
- expr: "max by (cluster, namespace, pod, container) (\n kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"}\n) * on(container,pod,namespace,cluster) (windows_pod_container_available)\n"
|
||||
record: kube_pod_windows_container_resource_memory_request
|
||||
- expr: 'kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} * on(container,pod,namespace,cluster) (windows_pod_container_available)
|
||||
|
||||
'
|
||||
record: kube_pod_windows_container_resource_memory_limit
|
||||
- expr: "max by (cluster, namespace, pod, container) (\n kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"}\n) * on(container,pod,namespace,cluster) (windows_pod_container_available)\n"
|
||||
record: kube_pod_windows_container_resource_cpu_cores_request
|
||||
- expr: 'kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} * on(container,pod,namespace,cluster) (windows_pod_container_available)
|
||||
|
||||
'
|
||||
record: kube_pod_windows_container_resource_cpu_cores_limit
|
||||
- expr: "sum by (cluster, namespace, pod, container) (\n rate(windows_container_total_runtime{}[5m])\n)\n"
|
||||
record: namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate
|
||||
|
||||
|
@ -105,7 +105,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: NodeHighNumberConntrackEntriesUsed
|
||||
annotations:
|
||||
description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of conntrack entries are used.'
|
||||
description: '{{`{{`}} $labels.instance {{`}}`}} {{`{{`}} $value | humanizePercentage {{`}}`}} of conntrack entries are used.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused
|
||||
summary: Number of conntrack are getting close to the limit.
|
||||
expr: '(node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit) > 0.75
|
||||
@ -193,7 +193,7 @@ spec:
|
||||
'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodecpuhighusage
|
||||
summary: High CPU usage.
|
||||
expr: 'sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter", mode!="idle"}[2m]))) * 100 > 90
|
||||
expr: 'sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter", mode!~"idle|iowait"}[2m]))) * 100 > 90
|
||||
|
||||
'
|
||||
for: 15m
|
||||
|
@ -101,7 +101,7 @@ operators:
|
||||
metrics:
|
||||
enabled: false
|
||||
namespace: monitoring
|
||||
targetRevision: 0.10.2
|
||||
targetRevision: 0.11.0
|
||||
istio:
|
||||
grafana: {}
|
||||
prometheus: {}
|
||||
|
Loading…
Reference in New Issue
Block a user