feat: metrics version bump

This commit is contained in:
Stefan Reimer 2021-06-30 18:26:06 +02:00
parent ff722835ab
commit dd867d8e58
117 changed files with 5304 additions and 4791 deletions

View File

@ -16,11 +16,11 @@ dependencies:
version: ">= 0.1.3" version: ">= 0.1.3"
repository: https://zero-down-time.github.io/kubezero/ repository: https://zero-down-time.github.io/kubezero/
- name: kube-prometheus-stack - name: kube-prometheus-stack
version: 16.1.2 version: 16.12.0
# Switch back to upstream once all alerts are fixed eg. etcd gpcr # Switch back to upstream once all alerts are fixed eg. etcd gpcr
# repository: https://prometheus-community.github.io/helm-charts # repository: https://prometheus-community.github.io/helm-charts
- name: prometheus-adapter - name: prometheus-adapter
version: 2.13 version: 2.14.2
repository: https://prometheus-community.github.io/helm-charts repository: https://prometheus-community.github.io/helm-charts
condition: prometheus-adapter.enabled condition: prometheus-adapter.enabled
kubeVersion: ">= 1.18.0" kubeVersion: ">= 1.18.0"

View File

@ -6,12 +6,12 @@ annotations:
url: https://github.com/prometheus-operator/kube-prometheus url: https://github.com/prometheus-operator/kube-prometheus
artifacthub.io/operator: "true" artifacthub.io/operator: "true"
apiVersion: v2 apiVersion: v2
appVersion: 0.48.0 appVersion: 0.48.1
dependencies: dependencies:
- condition: kubeStateMetrics.enabled - condition: kubeStateMetrics.enabled
name: kube-state-metrics name: kube-state-metrics
repository: https://prometheus-community.github.io/helm-charts repository: https://prometheus-community.github.io/helm-charts
version: 3.1.* version: 3.3.*
- condition: nodeExporter.enabled - condition: nodeExporter.enabled
name: prometheus-node-exporter name: prometheus-node-exporter
repository: https://prometheus-community.github.io/helm-charts repository: https://prometheus-community.github.io/helm-charts
@ -19,7 +19,7 @@ dependencies:
- condition: grafana.enabled - condition: grafana.enabled
name: grafana name: grafana
repository: https://grafana.github.io/helm-charts repository: https://grafana.github.io/helm-charts
version: 6.9.* version: 6.13.*
description: kube-prometheus-stack collects Kubernetes manifests, Grafana dashboards, description: kube-prometheus-stack collects Kubernetes manifests, Grafana dashboards,
and Prometheus rules combined with documentation and scripts to provide easy to and Prometheus rules combined with documentation and scripts to provide easy to
operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus
@ -47,4 +47,4 @@ sources:
- https://github.com/prometheus-community/helm-charts - https://github.com/prometheus-community/helm-charts
- https://github.com/prometheus-operator/kube-prometheus - https://github.com/prometheus-operator/kube-prometheus
type: application type: application
version: 16.1.2 version: 16.12.0

View File

@ -1,5 +1,5 @@
apiVersion: v2 apiVersion: v2
appVersion: 7.5.5 appVersion: 8.0.3
description: The leading tool for querying and visualizing time series and metrics. description: The leading tool for querying and visualizing time series and metrics.
home: https://grafana.net home: https://grafana.net
icon: https://raw.githubusercontent.com/grafana/grafana/master/public/img/logo_transparent_400x.png icon: https://raw.githubusercontent.com/grafana/grafana/master/public/img/logo_transparent_400x.png
@ -19,4 +19,4 @@ name: grafana
sources: sources:
- https://github.com/grafana/grafana - https://github.com/grafana/grafana
type: application type: application
version: 6.9.1 version: 6.13.5

View File

@ -59,8 +59,8 @@ This version requires Helm >= 3.1.0.
| `securityContext` | Deployment securityContext | `{"runAsUser": 472, "runAsGroup": 472, "fsGroup": 472}` | | `securityContext` | Deployment securityContext | `{"runAsUser": 472, "runAsGroup": 472, "fsGroup": 472}` |
| `priorityClassName` | Name of Priority Class to assign pods | `nil` | | `priorityClassName` | Name of Priority Class to assign pods | `nil` |
| `image.repository` | Image repository | `grafana/grafana` | | `image.repository` | Image repository | `grafana/grafana` |
| `image.tag` | Image tag (`Must be >= 5.0.0`) | `7.4.5` | | `image.tag` | Image tag (`Must be >= 5.0.0`) | `8.0.3` |
| `image.sha` | Image sha (optional) | `2b56f6106ddc376bb46d974230d530754bf65a640dfbc5245191d72d3b49efc6` | | `image.sha` | Image sha (optional) | `80c6d6ac633ba5ab3f722976fb1d9a138f87ca6a9934fcd26a5fc28cbde7dbfa` |
| `image.pullPolicy` | Image pull policy | `IfNotPresent` | | `image.pullPolicy` | Image pull policy | `IfNotPresent` |
| `image.pullSecrets` | Image pull secrets | `{}` | | `image.pullSecrets` | Image pull secrets | `{}` |
| `service.enabled` | Enable grafana service | `true` | | `service.enabled` | Enable grafana service | `true` |
@ -83,7 +83,7 @@ This version requires Helm >= 3.1.0.
| `ingress.path` | Ingress accepted path | `/` | | `ingress.path` | Ingress accepted path | `/` |
| `ingress.pathType` | Ingress type of path | `Prefix` | | `ingress.pathType` | Ingress type of path | `Prefix` |
| `ingress.hosts` | Ingress accepted hostnames | `["chart-example.local"]` | | `ingress.hosts` | Ingress accepted hostnames | `["chart-example.local"]` |
| `ingress.extraPaths` | Ingress extra paths to prepend to every host configuration. Useful when configuring [custom actions with AWS ALB Ingress Controller](https://kubernetes-sigs.github.io/aws-alb-ingress-controller/guide/ingress/annotation/#actions). | `[]` | | `ingress.extraPaths` | Ingress extra paths to prepend to every host configuration. Useful when configuring [custom actions with AWS ALB Ingress Controller](https://kubernetes-sigs.github.io/aws-alb-ingress-controller/guide/ingress/annotation/#actions). Requires `ingress.hosts` to have one or more host entries. | `[]` |
| `ingress.tls` | Ingress TLS configuration | `[]` | | `ingress.tls` | Ingress TLS configuration | `[]` |
| `resources` | CPU/Memory resource requests/limits | `{}` | | `resources` | CPU/Memory resource requests/limits | `{}` |
| `nodeSelector` | Node labels for pod assignment | `{}` | | `nodeSelector` | Node labels for pod assignment | `{}` |
@ -116,6 +116,7 @@ This version requires Helm >= 3.1.0.
| `envValueFrom` | Environment variables from alternate sources. See the API docs on [EnvVarSource](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.17/#envvarsource-v1-core) for format details. | `{}` | | `envValueFrom` | Environment variables from alternate sources. See the API docs on [EnvVarSource](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.17/#envvarsource-v1-core) for format details. | `{}` |
| `envFromSecret` | Name of a Kubernetes secret (must be manually created in the same namespace) containing values to be added to the environment. Can be templated | `""` | | `envFromSecret` | Name of a Kubernetes secret (must be manually created in the same namespace) containing values to be added to the environment. Can be templated | `""` |
| `envRenderSecret` | Sensible environment variables passed to pods and stored as secret | `{}` | | `envRenderSecret` | Sensible environment variables passed to pods and stored as secret | `{}` |
| `enableServiceLinks` | Inject Kubernetes services as environment variables. | `true` |
| `extraSecretMounts` | Additional grafana server secret mounts | `[]` | | `extraSecretMounts` | Additional grafana server secret mounts | `[]` |
| `extraVolumeMounts` | Additional grafana server volume mounts | `[]` | | `extraVolumeMounts` | Additional grafana server volume mounts | `[]` |
| `extraConfigmapMounts` | Additional grafana server configMap volume mounts | `[]` | | `extraConfigmapMounts` | Additional grafana server configMap volume mounts | `[]` |
@ -136,7 +137,7 @@ This version requires Helm >= 3.1.0.
| `podLabels` | Pod labels | `{}` | | `podLabels` | Pod labels | `{}` |
| `podPortName` | Name of the grafana port on the pod | `grafana` | | `podPortName` | Name of the grafana port on the pod | `grafana` |
| `sidecar.image.repository` | Sidecar image repository | `quay.io/kiwigrid/k8s-sidecar` | | `sidecar.image.repository` | Sidecar image repository | `quay.io/kiwigrid/k8s-sidecar` |
| `sidecar.image.tag` | Sidecar image tag | `1.10.7` | | `sidecar.image.tag` | Sidecar image tag | `1.12.2` |
| `sidecar.image.sha` | Sidecar image sha (optional) | `""` | | `sidecar.image.sha` | Sidecar image sha (optional) | `""` |
| `sidecar.imagePullPolicy` | Sidecar image pull policy | `IfNotPresent` | | `sidecar.imagePullPolicy` | Sidecar image pull policy | `IfNotPresent` |
| `sidecar.resources` | Sidecar resources | `{}` | | `sidecar.resources` | Sidecar resources | `{}` |

View File

@ -104,13 +104,67 @@ app.kubernetes.io/name: {{ include "grafana.name" . }}-image-renderer
app.kubernetes.io/instance: {{ .Release.Name }} app.kubernetes.io/instance: {{ .Release.Name }}
{{- end -}} {{- end -}}
{{/*
Looks if there's an existing secret and reuse its password. If not it generates
new password and use it.
*/}}
{{- define "grafana.password" -}}
{{- $secret := (lookup "v1" "Secret" (include "grafana.namespace" .) (include "grafana.fullname" .) ) -}}
{{- if $secret -}}
{{- index $secret "data" "admin-password" -}}
{{- else -}}
{{- (randAlphaNum 40) | b64enc | quote -}}
{{- end -}}
{{- end -}}
{{/*
Get KubeVersion removing pre-release information.
*/}}
{{- define "grafana.kubeVersion" -}}
{{- default .Capabilities.KubeVersion.Version (regexFind "v[0-9]+\\.[0-9]+\\.[0-9]+" .Capabilities.KubeVersion.Version) -}}
{{- end -}}
{{/* {{/*
Return the appropriate apiVersion for rbac. Return the appropriate apiVersion for rbac.
*/}} */}}
{{- define "rbac.apiVersion" -}} {{- define "grafana.rbac.apiVersion" -}}
{{- if .Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1" }} {{- if .Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1" }}
{{- print "rbac.authorization.k8s.io/v1" -}} {{- print "rbac.authorization.k8s.io/v1" -}}
{{- else -}} {{- else -}}
{{- print "rbac.authorization.k8s.io/v1beta1" -}} {{- print "rbac.authorization.k8s.io/v1beta1" -}}
{{- end -}}
{{- end -}} {{- end -}}
{{/*
Return the appropriate apiVersion for ingress.
*/}}
{{- define "grafana.ingress.apiVersion" -}}
{{- if and (.Capabilities.APIVersions.Has "networking.k8s.io/v1") (semverCompare ">= 1.19.x" (include "grafana.kubeVersion" .)) -}}
{{- print "networking.k8s.io/v1" -}}
{{- else if .Capabilities.APIVersions.Has "networking.k8s.io/v1beta1" -}}
{{- print "networking.k8s.io/v1beta1" -}}
{{- else -}}
{{- print "extensions/v1beta1" -}}
{{- end -}}
{{- end -}}
{{/*
Return if ingress is stable.
*/}}
{{- define "grafana.ingress.isStable" -}}
{{- eq (include "grafana.ingress.apiVersion" .) "networking.k8s.io/v1" -}}
{{- end -}}
{{/*
Return if ingress supports ingressClassName.
*/}}
{{- define "grafana.ingress.supportsIngressClassName" -}}
{{- or (eq (include "grafana.ingress.isStable" .) "true") (and (eq (include "grafana.ingress.apiVersion" .) "networking.k8s.io/v1beta1") (semverCompare ">= 1.18.x" (include "grafana.kubeVersion" .))) -}}
{{- end -}}
{{/*
Return if ingress supports pathType.
*/}}
{{- define "grafana.ingress.supportsPathType" -}}
{{- or (eq (include "grafana.ingress.isStable" .) "true") (and (eq (include "grafana.ingress.apiVersion" .) "networking.k8s.io/v1beta1") (semverCompare ">= 1.18.x" (include "grafana.kubeVersion" .))) -}}
{{- end -}} {{- end -}}

View File

@ -159,6 +159,7 @@ imagePullSecrets:
- name: {{ . }} - name: {{ . }}
{{- end}} {{- end}}
{{- end }} {{- end }}
enableServiceLinks: {{ .Values.enableServiceLinks }}
containers: containers:
{{- if .Values.sidecar.dashboards.enabled }} {{- if .Values.sidecar.dashboards.enabled }}
- name: {{ template "grafana.name" . }}-sc-dashboard - name: {{ template "grafana.name" . }}-sc-dashboard
@ -357,6 +358,14 @@ containers:
- name: GF_RENDERING_CALLBACK_URL - name: GF_RENDERING_CALLBACK_URL
value: http://{{ template "grafana.fullname" . }}.{{ template "grafana.namespace" . }}:{{ .Values.service.port }}/{{ .Values.imageRenderer.grafanaSubPath }} value: http://{{ template "grafana.fullname" . }}.{{ template "grafana.namespace" . }}:{{ .Values.service.port }}/{{ .Values.imageRenderer.grafanaSubPath }}
{{ end }} {{ end }}
- name: GF_PATHS_DATA
value: {{ (get .Values "grafana.ini").paths.data }}
- name: GF_PATHS_LOGS
value: {{ (get .Values "grafana.ini").paths.logs }}
- name: GF_PATHS_PLUGINS
value: {{ (get .Values "grafana.ini").paths.plugins }}
- name: GF_PATHS_PROVISIONING
value: {{ (get .Values "grafana.ini").paths.provisioning }}
{{- range $key, $value := .Values.envValueFrom }} {{- range $key, $value := .Values.envValueFrom }}
- name: {{ $key | quote }} - name: {{ $key | quote }}
valueFrom: valueFrom:

View File

@ -1,17 +1,13 @@
{{- if .Values.ingress.enabled -}} {{- if .Values.ingress.enabled -}}
{{- $ingressApiIsStable := eq (include "grafana.ingress.isStable" .) "true" -}}
{{- $ingressSupportsIngressClassName := eq (include "grafana.ingress.supportsIngressClassName" .) "true" -}}
{{- $ingressSupportsPathType := eq (include "grafana.ingress.supportsPathType" .) "true" -}}
{{- $fullName := include "grafana.fullname" . -}} {{- $fullName := include "grafana.fullname" . -}}
{{- $servicePort := .Values.service.port -}} {{- $servicePort := .Values.service.port -}}
{{- $ingressPath := .Values.ingress.path -}} {{- $ingressPath := .Values.ingress.path -}}
{{- $ingressPathType := .Values.ingress.pathType -}} {{- $ingressPathType := .Values.ingress.pathType -}}
{{- $extraPaths := .Values.ingress.extraPaths -}} {{- $extraPaths := .Values.ingress.extraPaths -}}
{{- $newAPI := .Capabilities.APIVersions.Has "networking.k8s.io/v1/Ingress" -}} apiVersion: {{ include "grafana.ingress.apiVersion" . }}
{{- if $newAPI -}}
apiVersion: networking.k8s.io/v1
{{- else if .Capabilities.APIVersions.Has "networking.k8s.io/v1beta1/Ingress" }}
apiVersion: networking.k8s.io/v1beta1
{{- else }}
apiVersion: extensions/v1beta1
{{- end }}
kind: Ingress kind: Ingress
metadata: metadata:
name: {{ $fullName }} name: {{ $fullName }}
@ -28,7 +24,7 @@ metadata:
{{- end }} {{- end }}
{{- end }} {{- end }}
spec: spec:
{{- if .Values.ingress.ingressClassName }} {{- if and $ingressSupportsIngressClassName .Values.ingress.ingressClassName }}
ingressClassName: {{ .Values.ingress.ingressClassName }} ingressClassName: {{ .Values.ingress.ingressClassName }}
{{- end -}} {{- end -}}
{{- if .Values.ingress.tls }} {{- if .Values.ingress.tls }}
@ -41,15 +37,15 @@ spec:
- host: {{ tpl . $}} - host: {{ tpl . $}}
http: http:
paths: paths:
{{ if $extraPaths }} {{- if $extraPaths }}
{{ toYaml $extraPaths | indent 10 }} {{ toYaml $extraPaths | indent 10 }}
{{- end }} {{- end }}
- path: {{ $ingressPath }} - path: {{ $ingressPath }}
{{- if $newAPI }} {{- if $ingressSupportsPathType }}
pathType: {{ $ingressPathType }} pathType: {{ $ingressPathType }}
{{- end }} {{- end }}
backend: backend:
{{- if $newAPI }} {{- if $ingressApiIsStable }}
service: service:
name: {{ $fullName }} name: {{ $fullName }}
port: port:
@ -63,18 +59,20 @@ spec:
- http: - http:
paths: paths:
- backend: - backend:
{{- if $newAPI }} {{- if $ingressApiIsStable }}
service: service:
name: {{ $fullName }} name: {{ $fullName }}
port: port:
number: {{ $servicePort }} number: {{ $servicePort }}
pathType: {{ $ingressPathType }}
{{- else }} {{- else }}
serviceName: {{ $fullName }} serviceName: {{ $fullName }}
servicePort: {{ $servicePort }} servicePort: {{ $servicePort }}
{{- end }} {{- end }}
{{- if $ingressPath }} {{- if $ingressPath }}
path: {{ $ingressPath }} path: {{ $ingressPath }}
{{- end }} {{- end }}
{{- if $ingressSupportsPathType }}
pathType: {{ $ingressPathType }}
{{- end }}
{{- end -}} {{- end -}}
{{- end }} {{- end }}

View File

@ -1,5 +1,5 @@
{{- if and .Values.rbac.create (not .Values.rbac.useExistingRole) -}} {{- if and .Values.rbac.create (not .Values.rbac.useExistingRole) -}}
apiVersion: {{ template "rbac.apiVersion" . }} apiVersion: {{ template "grafana.rbac.apiVersion" . }}
kind: Role kind: Role
metadata: metadata:
name: {{ template "grafana.fullname" . }} name: {{ template "grafana.fullname" . }}

View File

@ -1,5 +1,5 @@
{{- if .Values.rbac.create -}} {{- if .Values.rbac.create -}}
apiVersion: {{ template "rbac.apiVersion" . }} apiVersion: {{ template "grafana.rbac.apiVersion" . }}
kind: RoleBinding kind: RoleBinding
metadata: metadata:
name: {{ template "grafana.fullname" . }} name: {{ template "grafana.fullname" . }}

View File

@ -17,7 +17,7 @@ data:
{{- if .Values.adminPassword }} {{- if .Values.adminPassword }}
admin-password: {{ .Values.adminPassword | b64enc | quote }} admin-password: {{ .Values.adminPassword | b64enc | quote }}
{{- else }} {{- else }}
admin-password: {{ randAlphaNum 40 | b64enc | quote }} admin-password: {{ template "grafana.password" . }}
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- if not .Values.ldap.existingSecret }} {{- if not .Values.ldap.existingSecret }}

View File

@ -69,7 +69,7 @@ livenessProbe:
image: image:
repository: grafana/grafana repository: grafana/grafana
tag: 7.5.5 tag: 8.0.3
sha: "" sha: ""
pullPolicy: IfNotPresent pullPolicy: IfNotPresent
@ -188,7 +188,7 @@ ingress:
labels: {} labels: {}
path: / path: /
# pathType is only for k8s > 1.19 # pathType is only for k8s >= 1.1=
pathType: Prefix pathType: Prefix
hosts: hosts:
@ -382,6 +382,10 @@ envFromSecret: ""
## This can be useful for auth tokens, etc ## This can be useful for auth tokens, etc
envRenderSecret: {} envRenderSecret: {}
# Inject Kubernetes services as environment variables.
# See https://kubernetes.io/docs/concepts/services-networking/connect-applications-service/#environment-variables
enableServiceLinks: true
## Additional grafana server secret mounts ## Additional grafana server secret mounts
# Defines additional mounts with secrets. Secrets must be manually created in the namespace. # Defines additional mounts with secrets. Secrets must be manually created in the namespace.
extraSecretMounts: [] extraSecretMounts: []
@ -446,13 +450,13 @@ datasources: {}
# access: proxy # access: proxy
# isDefault: true # isDefault: true
# - name: CloudWatch # - name: CloudWatch
# type: cloudwatch # type: cloudwatch
# access: proxy # access: proxy
# uid: cloudwatch # uid: cloudwatch
# editable: false # editable: false
# jsonData: # jsonData:
# authType: credentials # authType: default
# defaultRegion: us-east-1 # defaultRegion: us-east-1
## Configure notifiers ## Configure notifiers
## ref: http://docs.grafana.org/administration/provisioning/#alert-notification-channels ## ref: http://docs.grafana.org/administration/provisioning/#alert-notification-channels
@ -532,7 +536,7 @@ dashboardsConfigMaps: {}
## ##
grafana.ini: grafana.ini:
paths: paths:
data: /var/lib/grafana/data data: /var/lib/grafana/
logs: /var/log/grafana logs: /var/log/grafana
plugins: /var/lib/grafana/plugins plugins: /var/lib/grafana/plugins
provisioning: /etc/grafana/provisioning provisioning: /etc/grafana/provisioning
@ -603,7 +607,7 @@ smtp:
sidecar: sidecar:
image: image:
repository: quay.io/kiwigrid/k8s-sidecar repository: quay.io/kiwigrid/k8s-sidecar
tag: 1.10.7 tag: 1.12.2
sha: "" sha: ""
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
resources: {} resources: {}

View File

@ -16,4 +16,4 @@ name: kube-state-metrics
sources: sources:
- https://github.com/kubernetes/kube-state-metrics/ - https://github.com/kubernetes/kube-state-metrics/
type: application type: application
version: 3.1.0 version: 3.3.1

View File

@ -62,7 +62,7 @@ The upgraded chart now the following changes:
See [Customizing the Chart Before Installing](https://helm.sh/docs/intro/using_helm/#customizing-the-chart-before-installing). To see all configurable options with detailed comments: See [Customizing the Chart Before Installing](https://helm.sh/docs/intro/using_helm/#customizing-the-chart-before-installing). To see all configurable options with detailed comments:
```console ```console
helm show values kube-state-metrics/kube-state-metrics helm show values prometheus-community/kube-state-metrics
``` ```
You may also run `helm show values` on this chart's [dependencies](#dependencies) for additional options. You may also run `helm show values` on this chart's [dependencies](#dependencies) for additional options.

View File

@ -18,6 +18,6 @@ roleRef:
{{- end }} {{- end }}
subjects: subjects:
- kind: ServiceAccount - kind: ServiceAccount
name: {{ template "kube-state-metrics.fullname" . }} name: {{ template "kube-state-metrics.serviceAccountName" . }}
namespace: {{ template "kube-state-metrics.namespace" . }} namespace: {{ template "kube-state-metrics.namespace" . }}
{{- end -}} {{- end -}}

View File

@ -13,30 +13,30 @@ metadata:
app.kubernetes.io/instance: "{{ .Release.Name }}" app.kubernetes.io/instance: "{{ .Release.Name }}"
app.kubernetes.io/managed-by: "{{ .Release.Service }}" app.kubernetes.io/managed-by: "{{ .Release.Service }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}" app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
{{- if .Values.customLabels }} {{- if .Values.customLabels }}
{{ toYaml .Values.customLabels | indent 4 }} {{ toYaml .Values.customLabels | indent 4 }}
{{- end }} {{- end }}
spec: spec:
selector: selector:
matchLabels: matchLabels:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }} app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }}
replicas: {{ .Values.replicas }} replicas: {{ .Values.replicas }}
{{- if .Values.autosharding.enabled }} {{- if .Values.autosharding.enabled }}
serviceName: {{ template "kube-state-metrics.fullname" . }} serviceName: {{ template "kube-state-metrics.fullname" . }}
volumeClaimTemplates: [] volumeClaimTemplates: []
{{- end }} {{- end }}
template: template:
metadata: metadata:
labels: labels:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }} app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }}
app.kubernetes.io/instance: "{{ .Release.Name }}" app.kubernetes.io/instance: "{{ .Release.Name }}"
{{- if .Values.customLabels }} {{- if .Values.customLabels }}
{{ toYaml .Values.customLabels | indent 8 }} {{ toYaml .Values.customLabels | indent 8 }}
{{- end }} {{- end }}
{{- if .Values.podAnnotations }} {{- if .Values.podAnnotations }}
annotations: annotations:
{{ toYaml .Values.podAnnotations | indent 8 }} {{ toYaml .Values.podAnnotations | indent 8 }}
{{- end }} {{- end }}
spec: spec:
hostNetwork: {{ .Values.hostNetwork }} hostNetwork: {{ .Values.hostNetwork }}
serviceAccountName: {{ template "kube-state-metrics.serviceAccountName" . }} serviceAccountName: {{ template "kube-state-metrics.serviceAccountName" . }}
@ -51,7 +51,7 @@ spec:
{{- end }} {{- end }}
containers: containers:
- name: {{ .Chart.Name }} - name: {{ .Chart.Name }}
{{- if .Values.autosharding.enabled }} {{- if .Values.autosharding.enabled }}
env: env:
- name: POD_NAME - name: POD_NAME
valueFrom: valueFrom:
@ -61,159 +61,94 @@ spec:
valueFrom: valueFrom:
fieldRef: fieldRef:
fieldPath: metadata.namespace fieldPath: metadata.namespace
{{- end }} {{- end }}
args: args:
{{ if .Values.extraArgs }} {{- if .Values.extraArgs }}
{{- range .Values.extraArgs }} {{- range .Values.extraArgs }}
- {{ . }} - {{ . }}
{{- end }} {{- end }}
{{ end }} {{- end }}
{{ if .Values.collectors.certificatesigningrequests }} {{- if .Values.service.port }}
- --resources=certificatesigningrequests - --port={{ .Values.service.port | default 8080}}
{{ end }} {{- end }}
{{ if .Values.collectors.configmaps }} {{- if .Values.collectors }}
- --resources=configmaps - --resources={{ .Values.collectors | join "," }}
{{ end }} {{- end }}
{{ if .Values.collectors.cronjobs }} {{- if .Values.metricLabelsAllowlist }}
- --resources=cronjobs - --metric-labels-allowlist={{ .Values.metricLabelsAllowlist | join "," }}
{{ end }} {{- end }}
{{ if .Values.collectors.daemonsets }} {{- if .Values.metricAllowlist }}
- --resources=daemonsets - --metric-allowlist={{ .Values.metricAllowlist | join "," }}
{{ end }} {{- end }}
{{ if .Values.collectors.deployments }} {{- if .Values.metricDenylist }}
- --resources=deployments - --metric-denylist={{ .Values.metricDenylist | join "," }}
{{ end }} {{- end }}
{{ if .Values.collectors.endpoints }} {{- if .Values.namespaces }}
- --resources=endpoints - --namespaces={{ tpl (.Values.namespaces | join ",") $ }}
{{ end }} {{- end }}
{{ if .Values.collectors.horizontalpodautoscalers }} {{- if .Values.autosharding.enabled }}
- --resources=horizontalpodautoscalers
{{ end }}
{{ if .Values.collectors.ingresses }}
- --resources=ingresses
{{ end }}
{{ if .Values.collectors.jobs }}
- --resources=jobs
{{ end }}
{{ if .Values.collectors.limitranges }}
- --resources=limitranges
{{ end }}
{{ if .Values.collectors.mutatingwebhookconfigurations }}
- --resources=mutatingwebhookconfigurations
{{ end }}
{{ if .Values.collectors.namespaces }}
- --resources=namespaces
{{ end }}
{{ if .Values.collectors.networkpolicies }}
- --resources=networkpolicies
{{ end }}
{{ if .Values.collectors.nodes }}
- --resources=nodes
{{ end }}
{{ if .Values.collectors.persistentvolumeclaims }}
- --resources=persistentvolumeclaims
{{ end }}
{{ if .Values.collectors.persistentvolumes }}
- --resources=persistentvolumes
{{ end }}
{{ if .Values.collectors.poddisruptionbudgets }}
- --resources=poddisruptionbudgets
{{ end }}
{{ if .Values.collectors.pods }}
- --resources=pods
{{ end }}
{{ if .Values.collectors.replicasets }}
- --resources=replicasets
{{ end }}
{{ if .Values.collectors.replicationcontrollers }}
- --resources=replicationcontrollers
{{ end }}
{{ if .Values.collectors.resourcequotas }}
- --resources=resourcequotas
{{ end }}
{{ if .Values.collectors.secrets }}
- --resources=secrets
{{ end }}
{{ if .Values.collectors.services }}
- --resources=services
{{ end }}
{{ if .Values.collectors.statefulsets }}
- --resources=statefulsets
{{ end }}
{{ if .Values.collectors.storageclasses }}
- --resources=storageclasses
{{ end }}
{{ if .Values.collectors.validatingwebhookconfigurations }}
- --resources=validatingwebhookconfigurations
{{ end }}
{{ if .Values.collectors.verticalpodautoscalers }}
- --resources=verticalpodautoscalers
{{ end }}
{{ if .Values.collectors.volumeattachments }}
- --resources=volumeattachments
{{ end }}
{{ if .Values.namespaces }}
- --namespaces={{ tpl .Values.namespaces $ | join "," }}
{{ end }}
{{ if .Values.autosharding.enabled }}
- --pod=$(POD_NAME) - --pod=$(POD_NAME)
- --pod-namespace=$(POD_NAMESPACE) - --pod-namespace=$(POD_NAMESPACE)
{{ end }} {{- end }}
{{ if .Values.kubeconfig.enabled }} {{- if .Values.kubeconfig.enabled }}
- --kubeconfig=/opt/k8s/.kube/config - --kubeconfig=/opt/k8s/.kube/config
{{ end }} {{- end }}
{{ if .Values.selfMonitor.telemetryHost }} {{- if .Values.selfMonitor.telemetryHost }}
- --telemetry-host={{ .Values.selfMonitor.telemetryHost }} - --telemetry-host={{ .Values.selfMonitor.telemetryHost }}
{{ end }} {{- end }}
- --telemetry-port=8081 - --telemetry-port={{ .Values.selfMonitor.telemetryPort | default 8081 }}
{{- if .Values.kubeconfig.enabled }} {{- if .Values.kubeconfig.enabled }}
volumeMounts: volumeMounts:
- name: kubeconfig - name: kubeconfig
mountPath: /opt/k8s/.kube/ mountPath: /opt/k8s/.kube/
readOnly: true readOnly: true
{{- end }} {{- end }}
imagePullPolicy: {{ .Values.image.pullPolicy }} imagePullPolicy: {{ .Values.image.pullPolicy }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
ports: ports:
- containerPort: 8080 - containerPort: {{ .Values.service.port | default 8080}}
{{- if .Values.selfMonitor.enabled }} {{- if .Values.selfMonitor.enabled }}
- containerPort: 8081 - containerPort: {{ .Values.selfMonitor.telemetryPort | default 8081 }}
{{- end }} {{- end }}
livenessProbe: livenessProbe:
httpGet: httpGet:
path: /healthz path: /healthz
port: 8080 port: {{ .Values.service.port | default 8080}}
initialDelaySeconds: 5 initialDelaySeconds: 5
timeoutSeconds: 5 timeoutSeconds: 5
readinessProbe: readinessProbe:
httpGet: httpGet:
path: / path: /
port: 8080 port: {{ .Values.service.port | default 8080}}
initialDelaySeconds: 5 initialDelaySeconds: 5
timeoutSeconds: 5 timeoutSeconds: 5
{{- if .Values.resources }} {{- if .Values.resources }}
resources: resources:
{{ toYaml .Values.resources | indent 10 }} {{ toYaml .Values.resources | indent 10 }}
{{- end }} {{- end }}
{{- if .Values.containerSecurityContext }}
securityContext:
{{ toYaml .Values.containerSecurityContext | indent 10 }}
{{- end }}
{{- if .Values.imagePullSecrets }} {{- if .Values.imagePullSecrets }}
imagePullSecrets: imagePullSecrets:
{{ toYaml .Values.imagePullSecrets | indent 8 }} {{ toYaml .Values.imagePullSecrets | indent 8 }}
{{- end }} {{- end }}
{{- if .Values.affinity }} {{- if .Values.affinity }}
affinity: affinity:
{{ toYaml .Values.affinity | indent 8 }} {{ toYaml .Values.affinity | indent 8 }}
{{- end }} {{- end }}
{{- if .Values.nodeSelector }} {{- if .Values.nodeSelector }}
nodeSelector: nodeSelector:
{{ toYaml .Values.nodeSelector | indent 8 }} {{ toYaml .Values.nodeSelector | indent 8 }}
{{- end }} {{- end }}
{{- if .Values.tolerations }} {{- if .Values.tolerations }}
tolerations: tolerations:
{{ toYaml .Values.tolerations | indent 8 }} {{ toYaml .Values.tolerations | indent 8 }}
{{- end }} {{- end }}
{{- if .Values.kubeconfig.enabled}} {{- if .Values.kubeconfig.enabled}}
volumes: volumes:
- name: kubeconfig - name: kubeconfig
secret: secret:
secretName: {{ template "kube-state-metrics.fullname" . }}-kubeconfig secretName: {{ template "kube-state-metrics.fullname" . }}-kubeconfig
{{- end }} {{- end }}

View File

@ -14,6 +14,6 @@ roleRef:
name: psp-{{ template "kube-state-metrics.fullname" . }} name: psp-{{ template "kube-state-metrics.fullname" . }}
subjects: subjects:
- kind: ServiceAccount - kind: ServiceAccount
name: {{ template "kube-state-metrics.fullname" . }} name: {{ template "kube-state-metrics.serviceAccountName" . }}
namespace: {{ template "kube-state-metrics.namespace" . }} namespace: {{ template "kube-state-metrics.namespace" . }}
{{- end }} {{- end }}

View File

@ -18,169 +18,169 @@ metadata:
namespace: {{ . }} namespace: {{ . }}
{{- end }} {{- end }}
rules: rules:
{{ if $.Values.collectors.certificatesigningrequests }} {{ if has "certificatesigningrequests" $.Values.collectors }}
- apiGroups: ["certificates.k8s.io"] - apiGroups: ["certificates.k8s.io"]
resources: resources:
- certificatesigningrequests - certificatesigningrequests
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.configmaps }} {{ if has "configmaps" $.Values.collectors }}
- apiGroups: [""] - apiGroups: [""]
resources: resources:
- configmaps - configmaps
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.cronjobs }} {{ if has "cronjobs" $.Values.collectors }}
- apiGroups: ["batch"] - apiGroups: ["batch"]
resources: resources:
- cronjobs - cronjobs
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.daemonsets }} {{ if has "daemonsets" $.Values.collectors }}
- apiGroups: ["extensions", "apps"] - apiGroups: ["extensions", "apps"]
resources: resources:
- daemonsets - daemonsets
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.deployments }} {{ if has "deployments" $.Values.collectors }}
- apiGroups: ["extensions", "apps"] - apiGroups: ["extensions", "apps"]
resources: resources:
- deployments - deployments
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.endpoints }} {{ if has "endpoints" $.Values.collectors }}
- apiGroups: [""] - apiGroups: [""]
resources: resources:
- endpoints - endpoints
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.horizontalpodautoscalers }} {{ if has "horizontalpodautoscalers" $.Values.collectors }}
- apiGroups: ["autoscaling"] - apiGroups: ["autoscaling"]
resources: resources:
- horizontalpodautoscalers - horizontalpodautoscalers
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.ingresses }} {{ if has "ingresses" $.Values.collectors }}
- apiGroups: ["extensions", "networking.k8s.io"] - apiGroups: ["extensions", "networking.k8s.io"]
resources: resources:
- ingresses - ingresses
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.jobs }} {{ if has "jobs" $.Values.collectors }}
- apiGroups: ["batch"] - apiGroups: ["batch"]
resources: resources:
- jobs - jobs
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.limitranges }} {{ if has "limitranges" $.Values.collectors }}
- apiGroups: [""] - apiGroups: [""]
resources: resources:
- limitranges - limitranges
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.mutatingwebhookconfigurations }} {{ if has "mutatingwebhookconfigurations" $.Values.collectors }}
- apiGroups: ["admissionregistration.k8s.io"] - apiGroups: ["admissionregistration.k8s.io"]
resources: resources:
- mutatingwebhookconfigurations - mutatingwebhookconfigurations
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.namespaces }} {{ if has "namespaces" $.Values.collectors }}
- apiGroups: [""] - apiGroups: [""]
resources: resources:
- namespaces - namespaces
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.networkpolicies }} {{ if has "networkpolicies" $.Values.collectors }}
- apiGroups: ["networking.k8s.io"] - apiGroups: ["networking.k8s.io"]
resources: resources:
- networkpolicies - networkpolicies
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.nodes }} {{ if has "nodes" $.Values.collectors }}
- apiGroups: [""] - apiGroups: [""]
resources: resources:
- nodes - nodes
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.persistentvolumeclaims }} {{ if has "persistentvolumeclaims" $.Values.collectors }}
- apiGroups: [""] - apiGroups: [""]
resources: resources:
- persistentvolumeclaims - persistentvolumeclaims
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.persistentvolumes }} {{ if has "persistentvolumes" $.Values.collectors }}
- apiGroups: [""] - apiGroups: [""]
resources: resources:
- persistentvolumes - persistentvolumes
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.poddisruptionbudgets }} {{ if has "poddisruptionbudgets" $.Values.collectors }}
- apiGroups: ["policy"] - apiGroups: ["policy"]
resources: resources:
- poddisruptionbudgets - poddisruptionbudgets
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.pods }} {{ if has "pods" $.Values.collectors }}
- apiGroups: [""] - apiGroups: [""]
resources: resources:
- pods - pods
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.replicasets }} {{ if has "replicasets" $.Values.collectors }}
- apiGroups: ["extensions", "apps"] - apiGroups: ["extensions", "apps"]
resources: resources:
- replicasets - replicasets
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.replicationcontrollers }} {{ if has "replicationcontrollers" $.Values.collectors }}
- apiGroups: [""] - apiGroups: [""]
resources: resources:
- replicationcontrollers - replicationcontrollers
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.resourcequotas }} {{ if has "resourcequotas" $.Values.collectors }}
- apiGroups: [""] - apiGroups: [""]
resources: resources:
- resourcequotas - resourcequotas
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.secrets }} {{ if has "secrets" $.Values.collectors }}
- apiGroups: [""] - apiGroups: [""]
resources: resources:
- secrets - secrets
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.services }} {{ if has "services" $.Values.collectors }}
- apiGroups: [""] - apiGroups: [""]
resources: resources:
- services - services
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.statefulsets }} {{ if has "statefulsets" $.Values.collectors }}
- apiGroups: ["apps"] - apiGroups: ["apps"]
resources: resources:
- statefulsets - statefulsets
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.storageclasses }} {{ if has "storageclasses" $.Values.collectors }}
- apiGroups: ["storage.k8s.io"] - apiGroups: ["storage.k8s.io"]
resources: resources:
- storageclasses - storageclasses
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.validatingwebhookconfigurations }} {{ if has "validatingwebhookconfigurations" $.Values.collectors }}
- apiGroups: ["admissionregistration.k8s.io"] - apiGroups: ["admissionregistration.k8s.io"]
resources: resources:
- validatingwebhookconfigurations - validatingwebhookconfigurations
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.volumeattachments }} {{ if has "volumeattachments" $.Values.collectors }}
- apiGroups: ["storage.k8s.io"] - apiGroups: ["storage.k8s.io"]
resources: resources:
- volumeattachments - volumeattachments
verbs: ["list", "watch"] verbs: ["list", "watch"]
{{ end -}} {{ end -}}
{{ if $.Values.collectors.verticalpodautoscalers }} {{ if has "verticalpodautoscalers" $.Values.collectors }}
- apiGroups: ["autoscaling.k8s.io"] - apiGroups: ["autoscaling.k8s.io"]
resources: resources:
- verticalpodautoscalers - verticalpodautoscalers

View File

@ -21,7 +21,7 @@ roleRef:
{{- end }} {{- end }}
subjects: subjects:
- kind: ServiceAccount - kind: ServiceAccount
name: {{ template "kube-state-metrics.fullname" $ }} name: {{ template "kube-state-metrics.serviceAccountName" $ }}
namespace: {{ template "kube-state-metrics.namespace" $ }} namespace: {{ template "kube-state-metrics.namespace" $ }}
{{- end -}} {{- end -}}
{{- end -}} {{- end -}}

View File

@ -23,16 +23,16 @@ spec:
ports: ports:
- name: "http" - name: "http"
protocol: TCP protocol: TCP
port: {{ .Values.service.port }} port: {{ .Values.service.port | default 8080}}
{{- if .Values.service.nodePort }} {{- if .Values.service.nodePort }}
nodePort: {{ .Values.service.nodePort }} nodePort: {{ .Values.service.nodePort }}
{{- end }} {{- end }}
targetPort: 8080 targetPort: {{ .Values.service.port | default 8080}}
{{ if .Values.selfMonitor.enabled }} {{ if .Values.selfMonitor.enabled }}
- name: "metrics" - name: "metrics"
protocol: TCP protocol: TCP
port: {{ .Values.selfMonitor.telemetryPort | default 8081 }} port: {{ .Values.selfMonitor.telemetryPort | default 8081 }}
targetPort: 8081 targetPort: {{ .Values.selfMonitor.telemetryPort | default 8081 }}
{{ end }} {{ end }}
{{- if .Values.service.loadBalancerIP }} {{- if .Values.service.loadBalancerIP }}
loadBalancerIP: "{{ .Values.service.loadBalancerIP }}" loadBalancerIP: "{{ .Values.service.loadBalancerIP }}"

View File

@ -7,7 +7,7 @@ metadata:
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version }} helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version }}
app.kubernetes.io/managed-by: {{ .Release.Service }} app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/instance: {{ .Release.Name }} app.kubernetes.io/instance: {{ .Release.Name }}
name: {{ template "kube-state-metrics.fullname" . }} name: {{ template "kube-state-metrics.serviceAccountName" . }}
namespace: {{ template "kube-state-metrics.namespace" . }} namespace: {{ template "kube-state-metrics.namespace" . }}
{{- if .Values.serviceAccount.annotations }} {{- if .Values.serviceAccount.annotations }}
annotations: annotations:

View File

@ -15,6 +15,6 @@ roleRef:
name: stsdiscovery-{{ template "kube-state-metrics.fullname" . }} name: stsdiscovery-{{ template "kube-state-metrics.fullname" . }}
subjects: subjects:
- kind: ServiceAccount - kind: ServiceAccount
name: {{ template "kube-state-metrics.fullname" . }} name: {{ template "kube-state-metrics.serviceAccountName" . }}
namespace: {{ template "kube-state-metrics.namespace" . }} namespace: {{ template "kube-state-metrics.namespace" . }}
{{- end }} {{- end }}

View File

@ -88,6 +88,11 @@ securityContext:
runAsUser: 65534 runAsUser: 65534
fsGroup: 65534 fsGroup: 65534
## Specify security settings for a Container
## Allows overrides and additional options compared to (Pod) securityContext
## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container
containerSecurityContext: {}
## Node labels for pod assignment ## Node labels for pod assignment
## Ref: https://kubernetes.io/docs/user-guide/node-selection/ ## Ref: https://kubernetes.io/docs/user-guide/node-selection/
nodeSelector: {} nodeSelector: {}
@ -109,37 +114,56 @@ podAnnotations: {}
# Ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/ # Ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/
podDisruptionBudget: {} podDisruptionBudget: {}
# Available collectors for kube-state-metrics. By default all available # Comma-separated list of metrics to be exposed.
# resources are enabled. # This list comprises of exact metric names and/or regex patterns.
# The allowlist and denylist are mutually exclusive.
metricAllowlist: []
# Comma-separated list of metrics not to be enabled.
# This list comprises of exact metric names and/or regex patterns.
# The allowlist and denylist are mutually exclusive.
metricDenylist: []
# Comma-separated list of additional Kubernetes label keys that will be used in the resource's
# labels metric. By default the metric contains only name and namespace labels.
# To include additional labels, provide a list of resource names in their plural form and Kubernetes
# label keys you would like to allow for them (Example: '=namespaces=[k8s-label-1,k8s-label-n,...],pods=[app],...)'.
# A single '*' can be provided per resource instead to allow any labels, but that has
# severe performance implications (Example: '=pods=[*]').
metricLabelsAllowlist: []
# - namespaces=[k8s-label-1,k8s-label-n]
# Available collectors for kube-state-metrics.
# By default, all available resources are enabled, comment out to disable.
collectors: collectors:
certificatesigningrequests: true - certificatesigningrequests
configmaps: true - configmaps
cronjobs: true - cronjobs
daemonsets: true - daemonsets
deployments: true - deployments
endpoints: true - endpoints
horizontalpodautoscalers: true - horizontalpodautoscalers
ingresses: true - ingresses
jobs: true - jobs
limitranges: true - limitranges
mutatingwebhookconfigurations: true - mutatingwebhookconfigurations
namespaces: true - namespaces
networkpolicies: true - networkpolicies
nodes: true - nodes
persistentvolumeclaims: true - persistentvolumeclaims
persistentvolumes: true - persistentvolumes
poddisruptionbudgets: true - poddisruptionbudgets
pods: true - pods
replicasets: true - replicasets
replicationcontrollers: true - replicationcontrollers
resourcequotas: true - resourcequotas
secrets: true - secrets
services: true - services
statefulsets: true - statefulsets
storageclasses: true - storageclasses
validatingwebhookconfigurations: true - validatingwebhookconfigurations
verticalpodautoscalers: false - verticalpodautoscalers
volumeattachments: true - volumeattachments
# Enabling kubeconfig will pass the --kubeconfig argument to the container # Enabling kubeconfig will pass the --kubeconfig argument to the container
kubeconfig: kubeconfig:

View File

@ -14,4 +14,4 @@ maintainers:
name: prometheus-node-exporter name: prometheus-node-exporter
sources: sources:
- https://github.com/prometheus/node_exporter/ - https://github.com/prometheus/node_exporter/
version: 1.18.0 version: 1.18.2

View File

@ -22,6 +22,9 @@ spec:
{{- if $.Values.prometheus.monitor.tlsConfig }} {{- if $.Values.prometheus.monitor.tlsConfig }}
tlsConfig: {{ toYaml $.Values.prometheus.monitor.tlsConfig | nindent 8 }} tlsConfig: {{ toYaml $.Values.prometheus.monitor.tlsConfig | nindent 8 }}
{{- end }} {{- end }}
{{- if .Values.prometheus.monitor.proxyUrl }}
proxyUrl: {{ .Values.prometheus.monitor.proxyUrl}}
{{- end }}
{{- if .Values.prometheus.monitor.scrapeTimeout }} {{- if .Values.prometheus.monitor.scrapeTimeout }}
scrapeTimeout: {{ .Values.prometheus.monitor.scrapeTimeout }} scrapeTimeout: {{ .Values.prometheus.monitor.scrapeTimeout }}
{{- end }} {{- end }}

View File

@ -8,7 +8,7 @@ metadata:
labels: {{ include "prometheus-node-exporter.labels" . | indent 4 }} labels: {{ include "prometheus-node-exporter.labels" . | indent 4 }}
{{- if .Values.rbac.pspAnnotations }} {{- if .Values.rbac.pspAnnotations }}
annotations: annotations:
{{ toYaml .Values.rbac.pspAnnotations | indent 4 }} {{ toYaml .Values.rbac.pspAnnotations | indent 4 }}
{{- end}} {{- end}}
spec: spec:
privileged: false privileged: false

View File

@ -24,6 +24,10 @@ prometheus:
bearerTokenFile: bearerTokenFile:
tlsConfig: {} tlsConfig: {}
## proxyUrl: URL of a proxy that should be used for scraping.
##
proxyUrl: ""
relabelings: [] relabelings: []
scrapeTimeout: 10s scrapeTimeout: 10s

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml # https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
--- ---
apiVersion: apiextensions.k8s.io/v1 apiVersion: apiextensions.k8s.io/v1

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml # https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml
--- ---
apiVersion: apiextensions.k8s.io/v1 apiVersion: apiextensions.k8s.io/v1

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml # https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
--- ---
apiVersion: apiextensions.k8s.io/v1 apiVersion: apiextensions.k8s.io/v1

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml # https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
--- ---
apiVersion: apiextensions.k8s.io/v1 apiVersion: apiextensions.k8s.io/v1

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml # https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
--- ---
apiVersion: apiextensions.k8s.io/v1 apiVersion: apiextensions.k8s.io/v1

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml # https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
--- ---
apiVersion: apiextensions.k8s.io/v1 apiVersion: apiextensions.k8s.io/v1

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml # https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
--- ---
apiVersion: apiextensions.k8s.io/v1 apiVersion: apiextensions.k8s.io/v1

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml # https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
--- ---
apiVersion: apiextensions.k8s.io/v1 apiVersion: apiextensions.k8s.io/v1

View File

@ -98,7 +98,12 @@ Allow the release namespace to be overridden for multi-namespace deployments in
{{/* Allow KubeVersion to be overridden. */}} {{/* Allow KubeVersion to be overridden. */}}
{{- define "kube-prometheus-stack.ingress.kubeVersion" -}} {{- define "kube-prometheus-stack.ingress.kubeVersion" -}}
{{- default .Capabilities.KubeVersion.Version .Values.kubeVersionOverride -}} {{- $kubeVersion := default .Capabilities.KubeVersion.Version .Values.kubeVersionOverride -}}
{{/* Special use case for Amazon EKS, Google GKE */}}
{{- if and (regexMatch "\\d+\\.\\d+\\.\\d+-(?:eks|gke).+" $kubeVersion) (not .Values.kubeVersionOverride) -}}
{{- $kubeVersion = regexFind "\\d+\\.\\d+\\.\\d+" $kubeVersion -}}
{{- end -}}
{{- $kubeVersion -}}
{{- end -}} {{- end -}}
{{/* Get Ingress API Version */}} {{/* Get Ingress API Version */}}

View File

@ -22,9 +22,9 @@ spec:
metricRelabelings: metricRelabelings:
{{ tpl (toYaml .Values.kubeApiServer.serviceMonitor.metricRelabelings | indent 6) . }} {{ tpl (toYaml .Values.kubeApiServer.serviceMonitor.metricRelabelings | indent 6) . }}
{{- end }} {{- end }}
{{- if .Values.kubeApiServer.relabelings }} {{- if .Values.kubeApiServer.serviceMonitor.relabelings }}
relabelings: relabelings:
{{ toYaml .Values.kubeApiServer.relabelings | indent 6 }} {{ toYaml .Values.kubeApiServer.serviceMonitor.relabelings | indent 6 }}
{{- end }} {{- end }}
tlsConfig: tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt

View File

@ -26,6 +26,24 @@ spec:
relabelings: relabelings:
{{ toYaml .Values.kubeStateMetrics.serviceMonitor.relabelings | indent 4 }} {{ toYaml .Values.kubeStateMetrics.serviceMonitor.relabelings | indent 4 }}
{{- end }} {{- end }}
{{- if .Values.kubeStateMetrics.serviceMonitor.selfMonitor.enabled }}
- port: metrics
{{- if .Values.kubeStateMetrics.serviceMonitor.interval }}
interval: {{ .Values.kubeStateMetrics.serviceMonitor.interval }}
{{- end }}
{{- if .Values.kubeStateMetrics.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.kubeStateMetrics.serviceMonitor.proxyUrl}}
{{- end }}
honorLabels: true
{{- if .Values.kubeStateMetrics.serviceMonitor.metricRelabelings }}
metricRelabelings:
{{ tpl (toYaml .Values.kubeStateMetrics.serviceMonitor.metricRelabelings | indent 4) . }}
{{- end }}
{{- if .Values.kubeStateMetrics.serviceMonitor.relabelings }}
relabelings:
{{ toYaml .Values.kubeStateMetrics.serviceMonitor.relabelings | indent 4 }}
{{- end }}
{{- end }}
{{- if .Values.kubeStateMetrics.serviceMonitor.namespaceOverride }} {{- if .Values.kubeStateMetrics.serviceMonitor.namespaceOverride }}
namespaceSelector: namespaceSelector:
matchNames: matchNames:

View File

@ -168,6 +168,6 @@ spec:
- {{ .Values.kubelet.namespace }} - {{ .Values.kubelet.namespace }}
selector: selector:
matchLabels: matchLabels:
app.kubernetes.io/managed-by: prometheus-operator app.kubernetes.io/name: kubelet
k8s-app: kubelet k8s-app: kubelet
{{- end}} {{- end}}

View File

@ -13,6 +13,11 @@ spec:
matchLabels: matchLabels:
app: prometheus-node-exporter app: prometheus-node-exporter
release: {{ $.Release.Name }} release: {{ $.Release.Name }}
{{- if (index .Values "prometheus-node-exporter" "namespaceOverride") }}
namespaceSelector:
matchNames:
- {{ index .Values "prometheus-node-exporter" "namespaceOverride" }}
{{- end }}
endpoints: endpoints:
- port: metrics - port: metrics
{{- if .Values.nodeExporter.serviceMonitor.interval }} {{- if .Values.nodeExporter.serviceMonitor.interval }}

View File

@ -1,4 +1,4 @@
{{- if and .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} {{- if or (and .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled) .Values.grafana.forceDeployDashboards }}
{{- $files := .Files.Glob "dashboards-1.14/*.json" }} {{- $files := .Files.Glob "dashboards-1.14/*.json" }}
{{- if $files }} {{- if $files }}
apiVersion: v1 apiVersion: v1

View File

@ -1,4 +1,4 @@
{{- if and .Values.grafana.enabled .Values.grafana.sidecar.datasources.enabled }} {{- if or (and .Values.grafana.enabled .Values.grafana.sidecar.datasources.enabled) .Values.grafana.forceDeployDatasources }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
@ -20,7 +20,11 @@ data:
{{- if .Values.grafana.sidecar.datasources.defaultDatasourceEnabled }} {{- if .Values.grafana.sidecar.datasources.defaultDatasourceEnabled }}
- name: Prometheus - name: Prometheus
type: prometheus type: prometheus
{{- if .Values.grafana.sidecar.datasources.url }}
url: {{ .Values.grafana.sidecar.datasources.url }}
{{- else }}
url: http://{{ template "kube-prometheus-stack.fullname" . }}-prometheus:{{ .Values.prometheus.service.port }}/{{ trimPrefix "/" .Values.prometheus.prometheusSpec.routePrefix }} url: http://{{ template "kube-prometheus-stack.fullname" . }}-prometheus:{{ .Values.prometheus.service.port }}/{{ trimPrefix "/" .Values.prometheus.prometheusSpec.routePrefix }}
{{- end }}
access: proxy access: proxy
isDefault: true isDefault: true
jsonData: jsonData:

View File

@ -0,0 +1,610 @@
{{- /*
Generated from 'alertmanager-overview' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1
kind: ConfigMap
metadata:
namespace: {{ template "kube-prometheus-stack.namespace" . }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "alertmanager-overview" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
labels:
{{- if $.Values.grafana.sidecar.dashboards.label }}
{{ $.Values.grafana.sidecar.dashboards.label }}: "1"
{{- end }}
app: {{ template "kube-prometheus-stack.name" $ }}-grafana
{{ include "kube-prometheus-stack.labels" $ | indent 4 }}
data:
alertmanager-overview.json: |-
{
"__inputs": [
],
"__requires": [
],
"annotations": {
"list": [
]
},
"editable": false,
"gnetId": null,
"graphTooltip": 1,
"hideControls": false,
"id": null,
"links": [
],
"refresh": "30s",
"rows": [
{
"collapse": false,
"collapsed": false,
"panels": [
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
},
"id": 2,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": false,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum(alertmanager_alerts{namespace=\"$namespace\",service=\"$service\"}) by (namespace,service,instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
"refId": "A"
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "Alerts",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "none",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "none",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
},
"id": 3,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": false,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(alertmanager_alerts_received_total{namespace=\"$namespace\",service=\"$service\"}[5m])) by (namespace,service,instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} Received",
"refId": "A"
},
{
"expr": "sum(rate(alertmanager_alerts_invalid_total{namespace=\"$namespace\",service=\"$service\"}[5m])) by (namespace,service,instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} Invalid",
"refId": "B"
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "Alerts receive rate",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Alerts",
"titleSize": "h6",
"type": "row"
},
{
"collapse": false,
"collapsed": false,
"panels": [
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
},
"id": 4,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": false,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": "integration",
"seriesOverrides": [
],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(alertmanager_notifications_total{namespace=\"$namespace\",service=\"$service\", integration=\"$integration\"}[5m])) by (integration,namespace,service,instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} Total",
"refId": "A"
},
{
"expr": "sum(rate(alertmanager_notifications_failed_total{namespace=\"$namespace\",service=\"$service\", integration=\"$integration\"}[5m])) by (integration,namespace,service,instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} Failed",
"refId": "B"
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "$integration: Notifications Send Rate",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
},
"id": 5,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": false,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": "integration",
"seriesOverrides": [
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=\"$namespace\",service=\"$service\", integration=\"$integration\"}[5m])) by (le,namespace,service,instance)\n) \n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} 99th Percentile",
"refId": "A"
},
{
"expr": "histogram_quantile(0.50,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=\"$namespace\",service=\"$service\", integration=\"$integration\"}[5m])) by (le,namespace,service,instance)\n) \n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} Median",
"refId": "B"
},
{
"expr": "sum(rate(alertmanager_notification_latency_seconds_sum{namespace=\"$namespace\",service=\"$service\", integration=\"$integration\"}[5m])) by (namespace,service,instance)\n/\nsum(rate(alertmanager_notification_latency_seconds_count{namespace=\"$namespace\",service=\"$service\", integration=\"$integration\"}[5m])) by (namespace,service,instance)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} Average",
"refId": "C"
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "$integration: Notification Duration",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Notifications",
"titleSize": "h6",
"type": "row"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [
"alertmanager-mixin"
],
"templating": {
"list": [
{
"current": {
"text": "Prometheus",
"value": "Prometheus"
},
"hide": 0,
"label": null,
"name": "datasource",
"options": [
],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": null,
"current": {
"text": "",
"value": ""
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "namespace",
"options": [
],
"query": "label_values(alertmanager_alerts, namespace)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [
],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"text": "",
"value": ""
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "service",
"options": [
],
"query": "label_values(alertmanager_alerts, service)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [
],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"text": "all",
"value": "$__all"
},
"datasource": "$datasource",
"hide": 2,
"includeAll": true,
"label": null,
"multi": false,
"name": "integration",
"options": [
],
"query": "label_values(alertmanager_notifications_total{integration=~\".*\"}, integration)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [
],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "utc",
"title": "Alertmanager / Overview",
"uid": "alertmanager-overview",
"version": 0
}
{{- end }}

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'apiserver' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'apiserver' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled .Values.kubeApiServer.enabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeApiServer.enabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'cluster-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'cluster-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'controller-manager' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'controller-manager' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled .Values.kubeControllerManager.enabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeControllerManager.enabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
@ -176,10 +176,10 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
"refId": "A" "refId": "A"
} }
], ],
@ -282,10 +282,10 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
"refId": "A" "refId": "A"
} }
], ],
@ -388,10 +388,10 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name, le))", "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
"refId": "A" "refId": "A"
} }
], ],

View File

@ -4,7 +4,7 @@ Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled .Values.kubeEtcd.enabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeEtcd.enabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:

View File

@ -1,6 +1,6 @@
{{- /* Added manually, can be changed in-place. */ -}} {{- /* Added manually, can be changed in-place. */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled .Values.coreDns.enabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.coreDns.enabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'k8s-resources-cluster' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'k8s-resources-cluster' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
@ -247,7 +247,7 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"cpu\"}) / sum(kube_node_status_allocatable{resource=\"cpu\",cluster=\"$cluster\"})", "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"cpu\",cluster=\"$cluster\"})",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -499,7 +499,7 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"memory\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})", "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -594,7 +594,7 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}namespace{{`}}`}}", "legendFormat": "{{`{{`}}namespace{{`}}`}}",
@ -885,7 +885,7 @@ data:
"step": 10 "step": 10
}, },
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -903,7 +903,7 @@ data:
"step": 10 "step": 10
}, },
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", resource=\"cpu\"}) by (namespace)", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -912,7 +912,7 @@ data:
"step": 10 "step": 10
}, },
{ {
"expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"cpu\"}) by (namespace)", "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -921,7 +921,7 @@ data:
"step": 10 "step": 10
}, },
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"cpu\"}) by (namespace)", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1321,7 +1321,7 @@ data:
"step": 10 "step": 10
}, },
{ {
"expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", resource=\"memory\"}) by (namespace)", "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1330,7 +1330,7 @@ data:
"step": 10 "step": 10
}, },
{ {
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", resource=\"memory\"}) by (namespace)", "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1339,7 +1339,7 @@ data:
"step": 10 "step": 10
}, },
{ {
"expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"memory\"}) by (namespace)", "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1348,7 +1348,7 @@ data:
"step": 10 "step": 10
}, },
{ {
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"memory\"}) by (namespace)", "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -2443,6 +2443,496 @@ data:
"showTitle": true, "showTitle": true,
"title": "Rate of Packets Dropped", "title": "Rate of Packets Dropped",
"titleSize": "h6" "titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"decimals": -1,
"fill": 10,
"id": 20,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 0,
"links": [
],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}namespace{{`}}`}}",
"legendLink": null,
"step": 10
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "IOPS(Reads+Writes)",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 21,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 0,
"links": [
],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}namespace{{`}}`}}",
"legendLink": null,
"step": 10
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "ThroughPut(Read+Write)",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Storage IO",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 22,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
],
"sort": {
"col": 4,
"desc": true
},
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"styles": [
{
"alias": "Time",
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"pattern": "Time",
"type": "hidden"
},
{
"alias": "IOPS(Reads)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": -1,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
"thresholds": [
],
"type": "number",
"unit": "short"
},
{
"alias": "IOPS(Writes)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": -1,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
"thresholds": [
],
"type": "number",
"unit": "short"
},
{
"alias": "IOPS(Reads + Writes)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": -1,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
"thresholds": [
],
"type": "number",
"unit": "short"
},
{
"alias": "Throughput(Read)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
"thresholds": [
],
"type": "number",
"unit": "Bps"
},
{
"alias": "Throughput(Write)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
"thresholds": [
],
"type": "number",
"unit": "Bps"
},
{
"alias": "Throughput(Read + Write)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
"thresholds": [
],
"type": "number",
"unit": "Bps"
},
{
"alias": "Namespace",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
"linkTargetBlank": false,
"linkTooltip": "Drill down to pods",
"linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
"pattern": "namespace",
"thresholds": [
],
"type": "number",
"unit": "short"
},
{
"alias": "",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"pattern": "/.*/",
"thresholds": [
],
"type": "string",
"unit": "short"
}
],
"targets": [
{
"expr": "sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A",
"step": 10
},
{
"expr": "sum by(namespace) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B",
"step": 10
},
{
"expr": "sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C",
"step": 10
},
{
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D",
"step": 10
},
{
"expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E",
"step": 10
},
{
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F",
"step": 10
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "Current Storage IO",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"transform": "table",
"type": "table",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Storage IO - Distribution",
"titleSize": "h6"
} }
], ],
"schemaVersion": 14, "schemaVersion": 14,

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'k8s-resources-namespace' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'k8s-resources-namespace' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
@ -78,7 +78,7 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -162,7 +162,7 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -256,7 +256,7 @@ data:
"thresholds": "70,80", "thresholds": "70,80",
"timeFrom": null, "timeFrom": null,
"timeShift": null, "timeShift": null,
"title": "Memory Utilization (from requests)", "title": "Memory Utilisation (from requests)",
"tooltip": { "tooltip": {
"shared": false, "shared": false,
"sort": 0, "sort": 0,
@ -446,7 +446,7 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}pod{{`}}`}}", "legendFormat": "{{`{{`}}pod{{`}}`}}",
@ -697,7 +697,7 @@ data:
], ],
"targets": [ "targets": [
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -715,7 +715,7 @@ data:
"step": 10 "step": 10
}, },
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -733,7 +733,7 @@ data:
"step": 10 "step": 10
}, },
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -2136,6 +2136,496 @@ data:
"showTitle": true, "showTitle": true,
"title": "Rate of Packets Dropped", "title": "Rate of Packets Dropped",
"titleSize": "h6" "titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"decimals": -1,
"fill": 10,
"id": 16,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 0,
"links": [
],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}pod{{`}}`}}",
"legendLink": null,
"step": 10
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "IOPS(Reads+Writes)",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 17,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 0,
"links": [
],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}pod{{`}}`}}",
"legendLink": null,
"step": 10
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "ThroughPut(Read+Write)",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Storage IO",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 18,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
],
"sort": {
"col": 4,
"desc": true
},
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"styles": [
{
"alias": "Time",
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"pattern": "Time",
"type": "hidden"
},
{
"alias": "IOPS(Reads)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": -1,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
"thresholds": [
],
"type": "number",
"unit": "short"
},
{
"alias": "IOPS(Writes)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": -1,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
"thresholds": [
],
"type": "number",
"unit": "short"
},
{
"alias": "IOPS(Reads + Writes)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": -1,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
"thresholds": [
],
"type": "number",
"unit": "short"
},
{
"alias": "Throughput(Read)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
"thresholds": [
],
"type": "number",
"unit": "Bps"
},
{
"alias": "Throughput(Write)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
"thresholds": [
],
"type": "number",
"unit": "Bps"
},
{
"alias": "Throughput(Read + Write)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
"thresholds": [
],
"type": "number",
"unit": "Bps"
},
{
"alias": "Pod",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
"linkTargetBlank": false,
"linkTooltip": "Drill down to pods",
"linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"pattern": "pod",
"thresholds": [
],
"type": "number",
"unit": "short"
},
{
"alias": "",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"pattern": "/.*/",
"thresholds": [
],
"type": "string",
"unit": "short"
}
],
"targets": [
{
"expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A",
"step": 10
},
{
"expr": "sum by(pod) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B",
"step": 10
},
{
"expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C",
"step": 10
},
{
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D",
"step": 10
},
{
"expr": "sum by(pod) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E",
"step": 10
},
{
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F",
"step": 10
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "Current Storage IO",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"transform": "table",
"type": "table",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Storage IO - Distribution",
"titleSize": "h6"
} }
], ],
"schemaVersion": 14, "schemaVersion": 14,

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'k8s-resources-node' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'k8s-resources-node' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
@ -77,7 +77,7 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}pod{{`}}`}}", "legendFormat": "{{`{{`}}pod{{`}}`}}",
@ -312,7 +312,7 @@ data:
], ],
"targets": [ "targets": [
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -330,7 +330,7 @@ data:
"step": 10 "step": 10
}, },
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"}) by (pod)", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"}) by (pod)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -348,7 +348,7 @@ data:
"step": 10 "step": 10
}, },
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"}) by (pod)", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"}) by (pod)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'k8s-resources-pod' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'k8s-resources-pod' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
@ -94,7 +94,7 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}) by (container)", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}) by (container)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}container{{`}}`}}", "legendFormat": "{{`{{`}}container{{`}}`}}",
@ -450,7 +450,7 @@ data:
], ],
"targets": [ "targets": [
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -468,7 +468,7 @@ data:
"step": 10 "step": 10
}, },
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -486,7 +486,7 @@ data:
"step": 10 "step": 10
}, },
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -634,7 +634,7 @@ data:
], ],
"timeFrom": null, "timeFrom": null,
"timeShift": null, "timeShift": null,
"title": "Memory Usage", "title": "Memory Usage (WSS)",
"tooltip": { "tooltip": {
"shared": false, "shared": false,
"sort": 0, "sort": 0,
@ -725,7 +725,7 @@ data:
"type": "hidden" "type": "hidden"
}, },
{ {
"alias": "Memory Usage", "alias": "Memory Usage (WSS)",
"colorMode": null, "colorMode": null,
"colors": [ "colors": [
@ -1591,6 +1591,697 @@ data:
"showTitle": true, "showTitle": true,
"title": "Rate of Packets Dropped", "title": "Rate of Packets Dropped",
"titleSize": "h6" "titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"decimals": -1,
"fill": 10,
"id": 12,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 0,
"links": [
],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[5m])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Reads",
"legendLink": null,
"step": 10
},
{
"expr": "ceil(sum by(pod) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[5m])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Writes",
"legendLink": null,
"step": 10
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "IOPS",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 13,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 0,
"links": [
],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Reads",
"legendLink": null,
"step": 10
},
{
"expr": "sum by(pod) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Writes",
"legendLink": null,
"step": 10
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "ThroughPut",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Storage IO - Distribution(Pod - Read & Writes)",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"decimals": -1,
"fill": 10,
"id": 14,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 0,
"links": [
],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "ceil(sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}container{{`}}`}}",
"legendLink": null,
"step": 10
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "IOPS(Reads+Writes)",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 15,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 0,
"links": [
],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}container{{`}}`}}",
"legendLink": null,
"step": 10
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "ThroughPut(Read+Write)",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Storage IO - Distribution(Containers)",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 16,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
],
"sort": {
"col": 4,
"desc": true
},
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"styles": [
{
"alias": "Time",
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"pattern": "Time",
"type": "hidden"
},
{
"alias": "IOPS(Reads)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": -1,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
"thresholds": [
],
"type": "number",
"unit": "short"
},
{
"alias": "IOPS(Writes)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": -1,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
"thresholds": [
],
"type": "number",
"unit": "short"
},
{
"alias": "IOPS(Reads + Writes)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": -1,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
"thresholds": [
],
"type": "number",
"unit": "short"
},
{
"alias": "Throughput(Read)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
"thresholds": [
],
"type": "number",
"unit": "Bps"
},
{
"alias": "Throughput(Write)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
"thresholds": [
],
"type": "number",
"unit": "Bps"
},
{
"alias": "Throughput(Read + Write)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
"thresholds": [
],
"type": "number",
"unit": "Bps"
},
{
"alias": "Container",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "container",
"thresholds": [
],
"type": "number",
"unit": "short"
},
{
"alias": "",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"pattern": "/.*/",
"thresholds": [
],
"type": "string",
"unit": "short"
}
],
"targets": [
{
"expr": "sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A",
"step": 10
},
{
"expr": "sum by(container) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B",
"step": 10
},
{
"expr": "sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C",
"step": 10
},
{
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D",
"step": 10
},
{
"expr": "sum by(container) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E",
"step": 10
},
{
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F",
"step": 10
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "Current Storage IO",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"transform": "table",
"type": "table",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Storage IO - Distribution",
"titleSize": "h6"
} }
], ],
"schemaVersion": 14, "schemaVersion": 14,

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'k8s-resources-workload' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'k8s-resources-workload' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
@ -77,7 +77,7 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}pod{{`}}`}}", "legendFormat": "{{`{{`}}pod{{`}}`}}",
@ -312,7 +312,7 @@ data:
], ],
"targets": [ "targets": [
{ {
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -330,7 +330,7 @@ data:
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -348,7 +348,7 @@ data:
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'k8s-resources-workloads-namespace' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'k8s-resources-workloads-namespace' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
@ -98,7 +98,7 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}workload{{`}}`}} - {{`{{`}}workload_type{{`}}`}}", "legendFormat": "{{`{{`}}workload{{`}}`}} - {{`{{`}}workload_type{{`}}`}}",
@ -396,7 +396,7 @@ data:
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -414,7 +414,7 @@ data:
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -432,7 +432,7 @@ data:
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'namespace-by-pod' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'namespace-by-pod' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'namespace-by-workload' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'namespace-by-workload' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'node-cluster-rsrc-use' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'node-cluster-rsrc-use' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled .Values.nodeExporter.enabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.nodeExporter.enabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'node-rsrc-use' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'node-rsrc-use' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled .Values.nodeExporter.enabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.nodeExporter.enabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'nodes' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'nodes' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
@ -92,9 +92,8 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(\n (1 - rate(node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"}[$__interval]))\n/ ignoring(cpu) group_left\n count without (cpu)( node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n", "expr": "(\n (1 - rate(node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"}[$__rate_interval]))\n/ ignoring(cpu) group_left\n count without (cpu)( node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n",
"format": "time_series", "format": "time_series",
"interval": "$__rate_interval",
"intervalFactor": 5, "intervalFactor": 5,
"legendFormat": "{{`{{`}}cpu{{`}}`}}", "legendFormat": "{{`{{`}}cpu{{`}}`}}",
"refId": "A" "refId": "A"
@ -528,25 +527,22 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])", "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__rate_interval])",
"format": "time_series", "format": "time_series",
"interval": "$__rate_interval",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}device{{`}}`}} read", "legendFormat": "{{`{{`}}device{{`}}`}} read",
"refId": "A" "refId": "A"
}, },
{ {
"expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])", "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__rate_interval])",
"format": "time_series", "format": "time_series",
"interval": "$__rate_interval",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}device{{`}}`}} written", "legendFormat": "{{`{{`}}device{{`}}`}} written",
"refId": "B" "refId": "B"
}, },
{ {
"expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])", "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__rate_interval])",
"format": "time_series", "format": "time_series",
"interval": "$__rate_interval",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}device{{`}}`}} io time", "legendFormat": "{{`{{`}}device{{`}}`}} io time",
"refId": "C" "refId": "C"
@ -758,9 +754,8 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])", "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval])",
"format": "time_series", "format": "time_series",
"interval": "$__rate_interval",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}device{{`}}`}}", "legendFormat": "{{`{{`}}device{{`}}`}}",
"refId": "A" "refId": "A"
@ -852,9 +847,8 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])", "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval])",
"format": "time_series", "format": "time_series",
"interval": "$__rate_interval",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}device{{`}}`}}", "legendFormat": "{{`{{`}}device{{`}}`}}",
"refId": "A" "refId": "A"

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'persistentvolumesusage' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'persistentvolumesusage' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
@ -92,14 +92,14 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", "expr": "(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "Used Space", "legendFormat": "Used Space",
"refId": "A" "refId": "A"
}, },
{ {
"expr": "sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", "expr": "sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "Free Space", "legendFormat": "Free Space",
@ -207,7 +207,7 @@ data:
"tableColumn": "", "tableColumn": "",
"targets": [ "targets": [
{ {
"expr": "max without(instance,node) (\n(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100)\n", "expr": "max without(instance,node) (\n(\n topk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n topk(1, kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n/\ntopk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "", "legendFormat": "",
@ -289,14 +289,14 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", "expr": "sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "Used inodes", "legendFormat": "Used inodes",
"refId": "A" "refId": "A"
}, },
{ {
"expr": "(\n sum without(instance, node) (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", "expr": "(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": " Free inodes", "legendFormat": " Free inodes",
@ -404,7 +404,7 @@ data:
"tableColumn": "", "tableColumn": "",
"targets": [ "targets": [
{ {
"expr": "max without(instance,node) (\nkubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100)\n", "expr": "max without(instance,node) (\ntopk(1, kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n/\ntopk(1, kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "", "legendFormat": "",

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'pod-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'pod-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'prometheus-remote-write' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'prometheus-remote-write' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled .Values.prometheus.prometheusSpec.remoteWriteDashboards }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.prometheus.prometheusSpec.remoteWriteDashboards }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'prometheus' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'prometheus' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'proxy' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'proxy' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled .Values.kubeProxy.enabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeProxy.enabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'scheduler' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'scheduler' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled .Values.kubeScheduler.enabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeScheduler.enabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
@ -176,31 +176,31 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} e2e", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} e2e",
"refId": "A" "refId": "A"
}, },
{ {
"expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} binding", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} binding",
"refId": "B" "refId": "B"
}, },
{ {
"expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} scheduling algorithm", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} scheduling algorithm",
"refId": "C" "refId": "C"
}, },
{ {
"expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} volume", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} volume",
"refId": "D" "refId": "D"
} }
], ],
@ -290,31 +290,31 @@ data:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} e2e", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} e2e",
"refId": "A" "refId": "A"
}, },
{ {
"expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} binding", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} binding",
"refId": "B" "refId": "B"
}, },
{ {
"expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} scheduling algorithm", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} scheduling algorithm",
"refId": "C" "refId": "C"
}, },
{ {
"expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} volume", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} volume",
"refId": "D" "refId": "D"
} }
], ],

View File

@ -4,7 +4,7 @@ Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
@ -925,4 +925,4 @@ data:
"uid": "a31c1f46e6f727cb37c0d731a7245005", "uid": "a31c1f46e6f727cb37c0d731a7245005",
"version": 0 "version": 0
} }
{{- end }} {{- end }}

View File

@ -1,10 +1,10 @@
{{- /* {{- /*
Generated from 'workload-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml Generated from 'workload-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:

View File

@ -58,8 +58,8 @@ spec:
tolerations: tolerations:
{{ toYaml . | indent 8 }} {{ toYaml . | indent 8 }}
{{- end }} {{- end }}
{{- if .Values.prometheusOperator.admissionWebhooks.patch.securityContext }}
securityContext: securityContext:
runAsGroup: 2000 {{ toYaml .Values.prometheusOperator.admissionWebhooks.patch.securityContext | indent 8 }}
runAsNonRoot: true {{- end }}
runAsUser: 2000
{{- end }} {{- end }}

View File

@ -59,8 +59,8 @@ spec:
tolerations: tolerations:
{{ toYaml . | indent 8 }} {{ toYaml . | indent 8 }}
{{- end }} {{- end }}
{{- if .Values.prometheusOperator.admissionWebhooks.patch.securityContext }}
securityContext: securityContext:
runAsGroup: 2000 {{ toYaml .Values.prometheusOperator.admissionWebhooks.patch.securityContext | indent 8 }}
runAsNonRoot: true {{- end }}
runAsUser: 2000
{{- end }} {{- end }}

View File

@ -5,8 +5,8 @@ metadata:
name: {{ template "kube-prometheus-stack.fullname" . }}-admission name: {{ template "kube-prometheus-stack.fullname" . }}-admission
{{- if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }} {{- if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }}
annotations: annotations:
certmanager.k8s.io/inject-ca-from: {{ printf "%s/%s-root-cert" .Release.Namespace (include "kube-prometheus-stack.fullname" .) | quote }} certmanager.k8s.io/inject-ca-from: {{ printf "%s/%s-admission" .Release.Namespace (include "kube-prometheus-stack.fullname" .) | quote }}
cert-manager.io/inject-ca-from: {{ printf "%s/%s-root-cert" .Release.Namespace (include "kube-prometheus-stack.fullname" .) | quote }} cert-manager.io/inject-ca-from: {{ printf "%s/%s-admission" .Release.Namespace (include "kube-prometheus-stack.fullname" .) | quote }}
{{- end }} {{- end }}
labels: labels:
app: {{ template "kube-prometheus-stack.name" $ }}-admission app: {{ template "kube-prometheus-stack.name" $ }}-admission

View File

@ -5,8 +5,8 @@ metadata:
name: {{ template "kube-prometheus-stack.fullname" . }}-admission name: {{ template "kube-prometheus-stack.fullname" . }}-admission
{{- if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }} {{- if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }}
annotations: annotations:
certmanager.k8s.io/inject-ca-from: {{ printf "%s/%s-root-cert" .Release.Namespace (include "kube-prometheus-stack.fullname" .) | quote }} certmanager.k8s.io/inject-ca-from: {{ printf "%s/%s-admission" .Release.Namespace (include "kube-prometheus-stack.fullname" .) | quote }}
cert-manager.io/inject-ca-from: {{ printf "%s/%s-root-cert" .Release.Namespace (include "kube-prometheus-stack.fullname" .) | quote }} cert-manager.io/inject-ca-from: {{ printf "%s/%s-admission" .Release.Namespace (include "kube-prometheus-stack.fullname" .) | quote }}
{{- end }} {{- end }}
labels: labels:
app: {{ template "kube-prometheus-stack.name" $ }}-admission app: {{ template "kube-prometheus-stack.name" $ }}-admission

View File

@ -78,6 +78,11 @@ spec:
{{- if .Values.prometheusOperator.prometheusInstanceNamespaces }} {{- if .Values.prometheusOperator.prometheusInstanceNamespaces }}
- --prometheus-instance-namespaces={{ .Values.prometheusOperator.prometheusInstanceNamespaces | join "," }} - --prometheus-instance-namespaces={{ .Values.prometheusOperator.prometheusInstanceNamespaces | join "," }}
{{- end }} {{- end }}
{{- if .Values.prometheusOperator.thanosImage.sha }}
- --thanos-default-base-image={{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}@sha256:{{ .Values.prometheusOperator.thanosImage.sha }}
{{- else }}
- --thanos-default-base-image={{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}
{{- end }}
{{- if .Values.prometheusOperator.thanosRulerInstanceNamespaces }} {{- if .Values.prometheusOperator.thanosRulerInstanceNamespaces }}
- --thanos-ruler-instance-namespaces={{ .Values.prometheusOperator.thanosRulerInstanceNamespaces | join "," }} - --thanos-ruler-instance-namespaces={{ .Values.prometheusOperator.thanosRulerInstanceNamespaces | join "," }}
{{- end }} {{- end }}

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'alertmanager.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml Generated from 'alertmanager.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/alertmanager-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
@ -26,41 +26,146 @@ spec:
groups: groups:
- name: alertmanager.rules - name: alertmanager.rules
rules: rules:
- alert: AlertmanagerConfigInconsistent - alert: AlertmanagerFailedReload
annotations: annotations:
message: 'The configuration of the instances of the Alertmanager cluster `{{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.service {{`}}`}}` are out of sync. description: Configuration has failed to load for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerfailedreload
{{`{{`}} range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query {{`}}`}} summary: Reloading an Alertmanager configuration has failed.
expr: |-
Configuration hash for pod {{`{{`}} .Labels.pod {{`}}`}} is "{{`{{`}} printf "%.f" .Value {{`}}`}}" # Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
{{`{{`}} end {{`}}`}} max_over_time(alertmanager_config_last_reload_successful{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m]) == 0
for: 10m
' labels:
expr: count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"})) != 1 severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: AlertmanagerMembersInconsistent
annotations:
description: Alertmanager {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} has only found {{`{{`}} $value {{`}}`}} members of the {{`{{`}}$labels.job{{`}}`}} cluster.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagermembersinconsistent
summary: A member of an Alertmanager cluster has not found all other cluster members.
expr: |-
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
max_over_time(alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m])
< on (namespace,service) group_left
count by (namespace,service) (max_over_time(alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m]))
for: 10m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: AlertmanagerFailedToSendAlerts
annotations:
description: Alertmanager {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} failed to send {{`{{`}} $value | humanizePercentage {{`}}`}} of notifications to {{`{{`}} $labels.integration {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerfailedtosendalerts
summary: An Alertmanager instance failed to send notifications.
expr: |-
(
rate(alertmanager_notifications_failed_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m])
/
rate(alertmanager_notifications_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m])
)
> 0.01
for: 5m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: AlertmanagerClusterFailedToSendAlerts
annotations:
description: The minimum notification failure rate to {{`{{`}} $labels.integration {{`}}`}} sent from any instance in the {{`{{`}}$labels.job{{`}}`}} cluster is {{`{{`}} $value | humanizePercentage {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerclusterfailedtosendalerts
summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration.
expr: |-
min by (namespace,service, integration) (
rate(alertmanager_notifications_failed_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}", integration=~`.*`}[5m])
/
rate(alertmanager_notifications_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}", integration=~`.*`}[5m])
)
> 0.01
for: 5m for: 5m
labels: labels:
severity: critical severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }} {{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }} {{- end }}
- alert: AlertmanagerFailedReload - alert: AlertmanagerClusterFailedToSendAlerts
annotations: annotations:
message: Reloading Alertmanager's configuration has failed for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}. description: The minimum notification failure rate to {{`{{`}} $labels.integration {{`}}`}} sent from any instance in the {{`{{`}}$labels.job{{`}}`}} cluster is {{`{{`}} $value | humanizePercentage {{`}}`}}.
expr: alertmanager_config_last_reload_successful{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} == 0 runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerclusterfailedtosendalerts
for: 10m summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration.
expr: |-
min by (namespace,service, integration) (
rate(alertmanager_notifications_failed_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}", integration!~`.*`}[5m])
/
rate(alertmanager_notifications_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}", integration!~`.*`}[5m])
)
> 0.01
for: 5m
labels: labels:
severity: warning severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }} {{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }} {{- end }}
- alert: AlertmanagerMembersInconsistent - alert: AlertmanagerConfigInconsistent
annotations: annotations:
message: Alertmanager has not found all other members of the cluster. description: Alertmanager instances within the {{`{{`}}$labels.job{{`}}`}} cluster have different configurations.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerconfiginconsistent
summary: Alertmanager instances within the same cluster have different configurations.
expr: |- expr: |-
alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} count by (namespace,service) (
!= on (service) GROUP_LEFT() count_values by (namespace,service) ("config_hash", alertmanager_config_hash{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"})
count by (service) (alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}) )
!= 1
for: 20m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: AlertmanagerClusterDown
annotations:
description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of Alertmanager instances within the {{`{{`}}$labels.job{{`}}`}} cluster have been up for less than half of the last 5m.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerclusterdown
summary: Half or more of the Alertmanager instances within the same cluster are down.
expr: |-
(
count by (namespace,service) (
avg_over_time(up{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m]) < 0.5
)
/
count by (namespace,service) (
up{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}
)
)
>= 0.5
for: 5m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: AlertmanagerClusterCrashlooping
annotations:
description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of Alertmanager instances within the {{`{{`}}$labels.job{{`}}`}} cluster have restarted at least 5 times in the last 10m.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerclustercrashlooping
summary: Half or more of the Alertmanager instances within the same cluster are crashlooping.
expr: |-
(
count by (namespace,service) (
changes(process_start_time_seconds{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[10m]) > 4
)
/
count by (namespace,service) (
up{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}
)
)
>= 0.5
for: 5m for: 5m
labels: labels:
severity: critical severity: critical

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'general.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml Generated from 'general.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-prometheus-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
@ -26,7 +26,9 @@ spec:
rules: rules:
- alert: TargetDown - alert: TargetDown
annotations: annotations:
message: '{{`{{`}} printf "%.4g" $value {{`}}`}}% of the {{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.service {{`}}`}} targets in {{`{{`}} $labels.namespace {{`}}`}} namespace are down.' description: '{{`{{`}} printf "%.4g" $value {{`}}`}}% of the {{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.service {{`}}`}} targets in {{`{{`}} $labels.namespace {{`}}`}} namespace are down.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-targetdown
summary: One or more targets are unreachable.
expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10 expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10
for: 10m for: 10m
labels: labels:
@ -36,7 +38,7 @@ spec:
{{- end }} {{- end }}
- alert: Watchdog - alert: Watchdog
annotations: annotations:
message: 'This is an alert meant to ensure that the entire alerting pipeline is functional. description: 'This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager This alert is always firing, therefore it should always be firing in Alertmanager
@ -47,6 +49,8 @@ spec:
"DeadMansSnitch" integration in PagerDuty. "DeadMansSnitch" integration in PagerDuty.
' '
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-watchdog
summary: An alert that should always be firing to certify that Alertmanager is working properly.
expr: vector(1) expr: vector(1)
labels: labels:
severity: none severity: none

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'k8s.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Generated from 'k8s.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
@ -26,11 +26,11 @@ spec:
rules: rules:
- expr: |- - expr: |-
sum by (cluster, namespace, pod, container) ( sum by (cluster, namespace, pod, container) (
rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m]) irate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m])
) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (
1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""}) 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
) )
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate
- expr: |- - expr: |-
container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, * on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
@ -77,6 +77,28 @@ spec:
) )
) )
record: namespace_cpu:kube_pod_container_resource_requests:sum record: namespace_cpu:kube_pod_container_resource_requests:sum
- expr: |-
sum by (namespace, cluster) (
sum by (namespace, pod, cluster) (
max by (namespace, pod, container, cluster) (
kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"}
) * on(namespace, pod, cluster) group_left() max by (namespace, pod) (
kube_pod_status_phase{phase=~"Pending|Running"} == 1
)
)
)
record: namespace_memory:kube_pod_container_resource_limits:sum
- expr: |-
sum by (namespace, cluster) (
sum by (namespace, pod, cluster) (
max by (namespace, pod, container, cluster) (
kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"}
) * on(namespace, pod, cluster) group_left() max by (namespace, pod) (
kube_pod_status_phase{phase=~"Pending|Running"} == 1
)
)
)
record: namespace_cpu:kube_pod_container_resource_limits:sum
- expr: |- - expr: |-
max by (cluster, namespace, workload, pod) ( max by (cluster, namespace, workload, pod) (
label_replace( label_replace(

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'kube-apiserver-availability.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Generated from 'kube-apiserver-availability.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
@ -29,56 +29,56 @@ spec:
1 - ( 1 - (
( (
# write too slow # write too slow
sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
- -
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
) + ) +
( (
# read too slow # read too slow
sum(increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d]))
- -
( (
( (
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d]))
or or
vector(0) vector(0)
) )
+ +
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="0.5"}[30d]))
+ +
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="5"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
) )
) + ) +
# errors # errors
sum(code:apiserver_request_total:increase30d{code=~"5.."} or vector(0)) sum by (cluster) (code:apiserver_request_total:increase30d{code=~"5.."} or vector(0))
) )
/ /
sum(code:apiserver_request_total:increase30d) sum by (cluster) (code:apiserver_request_total:increase30d)
labels: labels:
verb: all verb: all
record: apiserver_request:availability30d record: apiserver_request:availability30d
- expr: |- - expr: |-
1 - ( 1 - (
sum(increase(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30d]))
- -
( (
# too slow # too slow
( (
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d]))
or or
vector(0) vector(0)
) )
+ +
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30d]))
+ +
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
) )
+ +
# errors # errors
sum(code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0)) sum by (cluster) (code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0))
) )
/ /
sum(code:apiserver_request_total:increase30d{verb="read"}) sum by (cluster) (code:apiserver_request_total:increase30d{verb="read"})
labels: labels:
verb: read verb: read
record: apiserver_request:availability30d record: apiserver_request:availability30d
@ -86,74 +86,74 @@ spec:
1 - ( 1 - (
( (
# too slow # too slow
sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
- -
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
) )
+ +
# errors # errors
sum(code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0)) sum by (cluster) (code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0))
) )
/ /
sum(code:apiserver_request_total:increase30d{verb="write"}) sum by (cluster) (code:apiserver_request_total:increase30d{verb="write"})
labels: labels:
verb: write verb: write
record: apiserver_request:availability30d record: apiserver_request:availability30d
- expr: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30 - expr: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30
record: code_verb:apiserver_request_total:increase30d record: code_verb:apiserver_request_total:increase30d
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[1h])) - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"}) - expr: sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
labels: labels:
verb: read verb: read
record: code:apiserver_request_total:increase30d record: code:apiserver_request_total:increase30d
- expr: sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) - expr: sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
labels: labels:
verb: write verb: write
record: code:apiserver_request_total:increase30d record: code:apiserver_request_total:increase30d

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'kube-apiserver-slos' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Generated from 'kube-apiserver-slos' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'kube-apiserver.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Generated from 'kube-apiserver.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
@ -28,26 +28,26 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d]))
- -
( (
( (
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1d])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1d]))
or or
vector(0) vector(0)
) )
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d]))
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d]))
) )
) )
+ +
# errors # errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d]))
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate1d record: apiserver_request:burnrate1d
@ -55,26 +55,26 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h]))
- -
( (
( (
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1h]))
or or
vector(0) vector(0)
) )
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h]))
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h]))
) )
) )
+ +
# errors # errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h]))
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate1h record: apiserver_request:burnrate1h
@ -82,26 +82,26 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h]))
- -
( (
( (
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[2h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[2h]))
or or
vector(0) vector(0)
) )
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h]))
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h]))
) )
) )
+ +
# errors # errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h]))
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate2h record: apiserver_request:burnrate2h
@ -109,26 +109,26 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m]))
- -
( (
( (
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30m])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30m]))
or or
vector(0) vector(0)
) )
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m]))
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m]))
) )
) )
+ +
# errors # errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m]))
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate30m record: apiserver_request:burnrate30m
@ -136,26 +136,26 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d]))
- -
( (
( (
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[3d])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[3d]))
or or
vector(0) vector(0)
) )
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d]))
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d]))
) )
) )
+ +
# errors # errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d]))
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate3d record: apiserver_request:burnrate3d
@ -163,26 +163,26 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m]))
- -
( (
( (
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[5m])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[5m]))
or or
vector(0) vector(0)
) )
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m]))
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m]))
) )
) )
+ +
# errors # errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate5m record: apiserver_request:burnrate5m
@ -190,26 +190,26 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h]))
- -
( (
( (
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[6h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[6h]))
or or
vector(0) vector(0)
) )
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h]))
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h]))
) )
) )
+ +
# errors # errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h]))
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate6h record: apiserver_request:burnrate6h
@ -217,15 +217,15 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
- -
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1d])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1d]))
) )
+ +
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate1d record: apiserver_request:burnrate1d
@ -233,15 +233,15 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
- -
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1h]))
) )
+ +
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate1h record: apiserver_request:burnrate1h
@ -249,15 +249,15 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
- -
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[2h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[2h]))
) )
+ +
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate2h record: apiserver_request:burnrate2h
@ -265,15 +265,15 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
- -
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[30m])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[30m]))
) )
+ +
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate30m record: apiserver_request:burnrate30m
@ -281,15 +281,15 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
- -
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[3d])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[3d]))
) )
+ +
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate3d record: apiserver_request:burnrate3d
@ -297,15 +297,15 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
- -
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[5m])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[5m]))
) )
+ +
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate5m record: apiserver_request:burnrate5m
@ -313,32 +313,32 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
- -
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[6h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[6h]))
) )
+ +
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate6h record: apiserver_request:burnrate6h
- expr: sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) - expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
labels: labels:
verb: read verb: read
record: code_resource:apiserver_request_total:rate5m record: code_resource:apiserver_request_total:rate5m
- expr: sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) - expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
labels: labels:
verb: write verb: write
record: code_resource:apiserver_request_total:rate5m record: code_resource:apiserver_request_total:rate5m
- expr: histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0 - expr: histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0
labels: labels:
quantile: '0.99' quantile: '0.99'
verb: read verb: read
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- expr: histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0 - expr: histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0
labels: labels:
quantile: '0.99' quantile: '0.99'
verb: write verb: write

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'kube-prometheus-general.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml Generated from 'kube-prometheus-general.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-prometheus-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'kube-prometheus-node-recording.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml Generated from 'kube-prometheus-node-recording.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-prometheus-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'kube-scheduler.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Generated from 'kube-scheduler.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'kube-state-metrics' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml Generated from 'kube-state-metrics' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-state-metrics-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
@ -55,5 +55,33 @@ spec:
severity: critical severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }} {{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeStateMetricsShardingMismatch
annotations:
description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatemetricsshardingmismatch
summary: kube-state-metrics sharding is misconfigured.
expr: stdvar (kube_state_metrics_total_shards{job="kube-state-metrics"}) != 0
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeStateMetricsShardsMissing
annotations:
description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatemetricsshardsmissing
summary: kube-state-metrics shards are missing.
expr: |-
2^max(kube_state_metrics_total_shards{job="kube-state-metrics"}) - 1
-
sum( 2 ^ max by (shard_ordinal) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) )
!= 0
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }} {{- end }}
{{- end }} {{- end }}

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'kubelet.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Generated from 'kubelet.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
@ -30,7 +30,10 @@ spec:
description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is restarting {{`{{`}} printf "%.2f" $value {{`}}`}} times / 10 minutes. description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is restarting {{`{{`}} printf "%.2f" $value {{`}}`}} times / 10 minutes.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodcrashlooping runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodcrashlooping
summary: Pod is crash looping. summary: Pod is crash looping.
expr: rate(kube_pod_container_status_restarts_total{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[10m]) * 60 * 5 > 0 expr: |-
increase(kube_pod_container_status_restarts_total{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[10m]) > 0
and
sum without (phase) (kube_pod_status_phase{phase!="Running",job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} == 1)
for: 15m for: 15m
labels: labels:
severity: warning severity: warning
@ -261,19 +264,19 @@ spec:
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubehpareplicasmismatch runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubehpareplicasmismatch
summary: HPA has not matched descired number of replicas. summary: HPA has not matched descired number of replicas.
expr: |- expr: |-
(kube_hpa_status_desired_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} (kube_horizontalpodautoscaler_status_desired_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
!= !=
kube_hpa_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}) kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
and and
(kube_hpa_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
> >
kube_hpa_spec_min_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}) kube_horizontalpodautoscaler_spec_min_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
and and
(kube_hpa_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
< <
kube_hpa_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}) kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
and and
changes(kube_hpa_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[15m]) == 0 changes(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[15m]) == 0
for: 15m for: 15m
labels: labels:
severity: warning severity: warning
@ -286,9 +289,9 @@ spec:
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubehpamaxedout runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubehpamaxedout
summary: HPA is running at max replicas summary: HPA is running at max replicas
expr: |- expr: |-
kube_hpa_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
== ==
kube_hpa_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
for: 15m for: 15m
labels: labels:
severity: warning severity: warning

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'kubernetes-resources' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Generated from 'kubernetes-resources' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'kubernetes-storage' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Generated from 'kubernetes-storage' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'kubernetes-system-apiserver' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Generated from 'kubernetes-system-apiserver' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'kubernetes-system-controller-manager' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Generated from 'kubernetes-system-controller-manager' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'kubernetes-system-kubelet' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Generated from 'kubernetes-system-kubelet' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'kubernetes-system-scheduler' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Generated from 'kubernetes-system-scheduler' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'kubernetes-system' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Generated from 'kubernetes-system' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'node-exporter.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/node-exporter-prometheusRule.yaml Generated from 'node-exporter.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/node-exporter-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
@ -33,9 +33,9 @@ spec:
record: instance:node_num_cpu:sum record: instance:node_num_cpu:sum
- expr: |- - expr: |-
1 - avg without (cpu, mode) ( 1 - avg without (cpu, mode) (
rate(node_cpu_seconds_total{job="node-exporter", mode="idle"}[1m]) rate(node_cpu_seconds_total{job="node-exporter", mode="idle"}[5m])
) )
record: instance:node_cpu_utilisation:rate1m record: instance:node_cpu_utilisation:rate5m
- expr: |- - expr: |-
( (
node_load1{job="node-exporter"} node_load1{job="node-exporter"}
@ -50,30 +50,30 @@ spec:
node_memory_MemTotal_bytes{job="node-exporter"} node_memory_MemTotal_bytes{job="node-exporter"}
) )
record: instance:node_memory_utilisation:ratio record: instance:node_memory_utilisation:ratio
- expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[1m]) - expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m])
record: instance:node_vmstat_pgmajfault:rate1m record: instance:node_vmstat_pgmajfault:rate5m
- expr: rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m]) - expr: rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[5m])
record: instance_device:node_disk_io_time_seconds:rate1m record: instance_device:node_disk_io_time_seconds:rate5m
- expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m]) - expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[5m])
record: instance_device:node_disk_io_time_weighted_seconds:rate1m record: instance_device:node_disk_io_time_weighted_seconds:rate5m
- expr: |- - expr: |-
sum without (device) ( sum without (device) (
rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[1m]) rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[5m])
) )
record: instance:node_network_receive_bytes_excluding_lo:rate1m record: instance:node_network_receive_bytes_excluding_lo:rate5m
- expr: |- - expr: |-
sum without (device) ( sum without (device) (
rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[1m]) rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[5m])
) )
record: instance:node_network_transmit_bytes_excluding_lo:rate1m record: instance:node_network_transmit_bytes_excluding_lo:rate5m
- expr: |- - expr: |-
sum without (device) ( sum without (device) (
rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[1m]) rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[5m])
) )
record: instance:node_network_receive_drop_excluding_lo:rate1m record: instance:node_network_receive_drop_excluding_lo:rate5m
- expr: |- - expr: |-
sum without (device) ( sum without (device) (
rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[1m]) rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[5m])
) )
record: instance:node_network_transmit_drop_excluding_lo:rate1m record: instance:node_network_transmit_drop_excluding_lo:rate5m
{{- end }} {{- end }}

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'node-exporter' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/node-exporter-prometheusRule.yaml Generated from 'node-exporter' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/node-exporter-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
@ -27,6 +27,7 @@ spec:
- alert: NodeFilesystemSpaceFillingUp - alert: NodeFilesystemSpaceFillingUp
annotations: annotations:
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up. description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemspacefillingup
summary: Filesystem is predicted to run out of space within the next 24 hours. summary: Filesystem is predicted to run out of space within the next 24 hours.
expr: |- expr: |-
( (
@ -45,6 +46,7 @@ spec:
- alert: NodeFilesystemSpaceFillingUp - alert: NodeFilesystemSpaceFillingUp
annotations: annotations:
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up fast. description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up fast.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemspacefillingup
summary: Filesystem is predicted to run out of space within the next 4 hours. summary: Filesystem is predicted to run out of space within the next 4 hours.
expr: |- expr: |-
( (
@ -63,6 +65,7 @@ spec:
- alert: NodeFilesystemAlmostOutOfSpace - alert: NodeFilesystemAlmostOutOfSpace
annotations: annotations:
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left. description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemalmostoutofspace
summary: Filesystem has less than 5% space left. summary: Filesystem has less than 5% space left.
expr: |- expr: |-
( (
@ -79,6 +82,7 @@ spec:
- alert: NodeFilesystemAlmostOutOfSpace - alert: NodeFilesystemAlmostOutOfSpace
annotations: annotations:
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left. description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemalmostoutofspace
summary: Filesystem has less than 3% space left. summary: Filesystem has less than 3% space left.
expr: |- expr: |-
( (
@ -95,6 +99,7 @@ spec:
- alert: NodeFilesystemFilesFillingUp - alert: NodeFilesystemFilesFillingUp
annotations: annotations:
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left and is filling up. description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left and is filling up.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemfilesfillingup
summary: Filesystem is predicted to run out of inodes within the next 24 hours. summary: Filesystem is predicted to run out of inodes within the next 24 hours.
expr: |- expr: |-
( (
@ -113,6 +118,7 @@ spec:
- alert: NodeFilesystemFilesFillingUp - alert: NodeFilesystemFilesFillingUp
annotations: annotations:
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left and is filling up fast. description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left and is filling up fast.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemfilesfillingup
summary: Filesystem is predicted to run out of inodes within the next 4 hours. summary: Filesystem is predicted to run out of inodes within the next 4 hours.
expr: |- expr: |-
( (
@ -131,6 +137,7 @@ spec:
- alert: NodeFilesystemAlmostOutOfFiles - alert: NodeFilesystemAlmostOutOfFiles
annotations: annotations:
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left. description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemalmostoutoffiles
summary: Filesystem has less than 5% inodes left. summary: Filesystem has less than 5% inodes left.
expr: |- expr: |-
( (
@ -147,6 +154,7 @@ spec:
- alert: NodeFilesystemAlmostOutOfFiles - alert: NodeFilesystemAlmostOutOfFiles
annotations: annotations:
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left. description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemalmostoutoffiles
summary: Filesystem has less than 3% inodes left. summary: Filesystem has less than 3% inodes left.
expr: |- expr: |-
( (
@ -163,6 +171,7 @@ spec:
- alert: NodeNetworkReceiveErrs - alert: NodeNetworkReceiveErrs
annotations: annotations:
description: '{{`{{`}} $labels.instance {{`}}`}} interface {{`{{`}} $labels.device {{`}}`}} has encountered {{`{{`}} printf "%.0f" $value {{`}}`}} receive errors in the last two minutes.' description: '{{`{{`}} $labels.instance {{`}}`}} interface {{`{{`}} $labels.device {{`}}`}} has encountered {{`{{`}} printf "%.0f" $value {{`}}`}} receive errors in the last two minutes.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodenetworkreceiveerrs
summary: Network interface is reporting many receive errors. summary: Network interface is reporting many receive errors.
expr: rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01 expr: rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01
for: 1h for: 1h
@ -174,6 +183,7 @@ spec:
- alert: NodeNetworkTransmitErrs - alert: NodeNetworkTransmitErrs
annotations: annotations:
description: '{{`{{`}} $labels.instance {{`}}`}} interface {{`{{`}} $labels.device {{`}}`}} has encountered {{`{{`}} printf "%.0f" $value {{`}}`}} transmit errors in the last two minutes.' description: '{{`{{`}} $labels.instance {{`}}`}} interface {{`{{`}} $labels.device {{`}}`}} has encountered {{`{{`}} printf "%.0f" $value {{`}}`}} transmit errors in the last two minutes.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodenetworktransmiterrs
summary: Network interface is reporting many transmit errors. summary: Network interface is reporting many transmit errors.
expr: rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01 expr: rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01
for: 1h for: 1h
@ -185,6 +195,7 @@ spec:
- alert: NodeHighNumberConntrackEntriesUsed - alert: NodeHighNumberConntrackEntriesUsed
annotations: annotations:
description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of conntrack entries are used.' description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of conntrack entries are used.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodehighnumberconntrackentriesused
summary: Number of conntrack are getting close to the limit. summary: Number of conntrack are getting close to the limit.
expr: (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75 expr: (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
labels: labels:
@ -195,6 +206,7 @@ spec:
- alert: NodeTextFileCollectorScrapeError - alert: NodeTextFileCollectorScrapeError
annotations: annotations:
description: Node Exporter text file collector failed to scrape. description: Node Exporter text file collector failed to scrape.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodetextfilecollectorscrapeerror
summary: Node Exporter text file collector failed to scrape. summary: Node Exporter text file collector failed to scrape.
expr: node_textfile_scrape_error{job="node-exporter"} == 1 expr: node_textfile_scrape_error{job="node-exporter"} == 1
labels: labels:
@ -204,7 +216,8 @@ spec:
{{- end }} {{- end }}
- alert: NodeClockSkewDetected - alert: NodeClockSkewDetected
annotations: annotations:
message: Clock on {{`{{`}} $labels.instance {{`}}`}} is out of sync by more than 300s. Ensure NTP is configured correctly on this host. description: Clock on {{`{{`}} $labels.instance {{`}}`}} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodeclockskewdetected
summary: Clock skew detected. summary: Clock skew detected.
expr: |- expr: |-
( (
@ -226,7 +239,8 @@ spec:
{{- end }} {{- end }}
- alert: NodeClockNotSynchronising - alert: NodeClockNotSynchronising
annotations: annotations:
message: Clock on {{`{{`}} $labels.instance {{`}}`}} is not synchronising. Ensure NTP is configured on this host. description: Clock on {{`{{`}} $labels.instance {{`}}`}} is not synchronising. Ensure NTP is configured on this host.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodeclocknotsynchronising
summary: Clock not synchronising. summary: Clock not synchronising.
expr: |- expr: |-
min_over_time(node_timex_sync_status[5m]) == 0 min_over_time(node_timex_sync_status[5m]) == 0
@ -241,6 +255,7 @@ spec:
- alert: NodeRAIDDegraded - alert: NodeRAIDDegraded
annotations: annotations:
description: RAID array '{{`{{`}} $labels.device {{`}}`}}' on {{`{{`}} $labels.instance {{`}}`}} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically. description: RAID array '{{`{{`}} $labels.device {{`}}`}}' on {{`{{`}} $labels.instance {{`}}`}} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-noderaiddegraded
summary: RAID Array is degraded summary: RAID Array is degraded
expr: node_md_disks_required - ignoring (state) (node_md_disks{state="active"}) > 0 expr: node_md_disks_required - ignoring (state) (node_md_disks{state="active"}) > 0
for: 15m for: 15m
@ -252,8 +267,9 @@ spec:
- alert: NodeRAIDDiskFailure - alert: NodeRAIDDiskFailure
annotations: annotations:
description: At least one device in RAID array on {{`{{`}} $labels.instance {{`}}`}} failed. Array '{{`{{`}} $labels.device {{`}}`}}' needs attention and possibly a disk swap. description: At least one device in RAID array on {{`{{`}} $labels.instance {{`}}`}} failed. Array '{{`{{`}} $labels.device {{`}}`}}' needs attention and possibly a disk swap.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-noderaiddiskfailure
summary: Failed device in RAID array summary: Failed device in RAID array
expr: node_md_disks{state="fail"} > 0 expr: node_md_disks{state="failed"} > 0
labels: labels:
severity: warning severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }} {{- if .Values.defaultRules.additionalRuleLabels }}

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'node-network' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml Generated from 'node-network' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-prometheus-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
@ -26,7 +26,8 @@ spec:
rules: rules:
- alert: NodeNetworkInterfaceFlapping - alert: NodeNetworkInterfaceFlapping
annotations: annotations:
message: Network interface "{{`{{`}} $labels.device {{`}}`}}" changing it's up status often on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}" message: Network interface "{{`{{`}} $labels.device {{`}}`}}" changing it's up status often on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodenetworkinterfaceflapping
expr: changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2 expr: changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2
for: 2m for: 2m
labels: labels:

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'node.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Generated from 'node.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'prometheus-operator' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml Generated from 'prometheus-operator' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/prometheus-operator-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}

View File

@ -1,5 +1,5 @@
{{- /* {{- /*
Generated from 'prometheus' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml Generated from 'prometheus' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/prometheus-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link: Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}} */ -}}
@ -29,6 +29,7 @@ spec:
- alert: PrometheusBadConfig - alert: PrometheusBadConfig
annotations: annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed to reload its configuration. description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed to reload its configuration.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusbadconfig
summary: Failed Prometheus configuration reload. summary: Failed Prometheus configuration reload.
expr: |- expr: |-
# Without max_over_time, failed scrapes could create false negatives, see # Without max_over_time, failed scrapes could create false negatives, see
@ -43,6 +44,7 @@ spec:
- alert: PrometheusNotificationQueueRunningFull - alert: PrometheusNotificationQueueRunningFull
annotations: annotations:
description: Alert notification queue of Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is running full. description: Alert notification queue of Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is running full.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusnotificationqueuerunningfull
summary: Prometheus alert notification queue predicted to run full in less than 30m. summary: Prometheus alert notification queue predicted to run full in less than 30m.
expr: |- expr: |-
# Without min_over_time, failed scrapes could create false negatives, see # Without min_over_time, failed scrapes could create false negatives, see
@ -61,6 +63,7 @@ spec:
- alert: PrometheusErrorSendingAlertsToSomeAlertmanagers - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
annotations: annotations:
description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.alertmanager{{`}}`}}.' description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.alertmanager{{`}}`}}.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheuserrorsendingalertstosomealertmanagers
summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager. summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
expr: |- expr: |-
( (
@ -75,28 +78,11 @@ spec:
severity: warning severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }} {{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager
annotations:
description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% minimum errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} to any Alertmanager.'
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
expr: |-
min without(alertmanager) (
rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
/
rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
)
* 100
> 3
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }} {{- end }}
- alert: PrometheusNotConnectedToAlertmanagers - alert: PrometheusNotConnectedToAlertmanagers
annotations: annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is not connected to any Alertmanagers. description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is not connected to any Alertmanagers.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusnotconnectedtoalertmanagers
summary: Prometheus is not connected to any Alertmanagers. summary: Prometheus is not connected to any Alertmanagers.
expr: |- expr: |-
# Without max_over_time, failed scrapes could create false negatives, see # Without max_over_time, failed scrapes could create false negatives, see
@ -111,6 +97,7 @@ spec:
- alert: PrometheusTSDBReloadsFailing - alert: PrometheusTSDBReloadsFailing
annotations: annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has detected {{`{{`}}$value | humanize{{`}}`}} reload failures over the last 3h. description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has detected {{`{{`}}$value | humanize{{`}}`}} reload failures over the last 3h.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheustsdbreloadsfailing
summary: Prometheus has issues reloading blocks from disk. summary: Prometheus has issues reloading blocks from disk.
expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[3h]) > 0 expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[3h]) > 0
for: 4h for: 4h
@ -122,6 +109,7 @@ spec:
- alert: PrometheusTSDBCompactionsFailing - alert: PrometheusTSDBCompactionsFailing
annotations: annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has detected {{`{{`}}$value | humanize{{`}}`}} compaction failures over the last 3h. description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has detected {{`{{`}}$value | humanize{{`}}`}} compaction failures over the last 3h.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheustsdbcompactionsfailing
summary: Prometheus has issues compacting blocks. summary: Prometheus has issues compacting blocks.
expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[3h]) > 0 expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[3h]) > 0
for: 4h for: 4h
@ -133,8 +121,18 @@ spec:
- alert: PrometheusNotIngestingSamples - alert: PrometheusNotIngestingSamples
annotations: annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is not ingesting samples. description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is not ingesting samples.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusnotingestingsamples
summary: Prometheus is not ingesting samples. summary: Prometheus is not ingesting samples.
expr: rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0 expr: |-
(
rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0
and
(
sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}) > 0
or
sum without(rule_group) (prometheus_rule_group_rules{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}) > 0
)
)
for: 10m for: 10m
labels: labels:
severity: warning severity: warning
@ -144,6 +142,7 @@ spec:
- alert: PrometheusDuplicateTimestamps - alert: PrometheusDuplicateTimestamps
annotations: annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is dropping {{`{{`}} printf "%.4g" $value {{`}}`}} samples/s with different values but duplicated timestamp. description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is dropping {{`{{`}} printf "%.4g" $value {{`}}`}} samples/s with different values but duplicated timestamp.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusduplicatetimestamps
summary: Prometheus is dropping samples with duplicate timestamps. summary: Prometheus is dropping samples with duplicate timestamps.
expr: rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 expr: rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
for: 10m for: 10m
@ -155,6 +154,7 @@ spec:
- alert: PrometheusOutOfOrderTimestamps - alert: PrometheusOutOfOrderTimestamps
annotations: annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is dropping {{`{{`}} printf "%.4g" $value {{`}}`}} samples/s with timestamps arriving out of order. description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is dropping {{`{{`}} printf "%.4g" $value {{`}}`}} samples/s with timestamps arriving out of order.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusoutofordertimestamps
summary: Prometheus drops samples with out-of-order timestamps. summary: Prometheus drops samples with out-of-order timestamps.
expr: rate(prometheus_target_scrapes_sample_out_of_order_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 expr: rate(prometheus_target_scrapes_sample_out_of_order_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
for: 10m for: 10m
@ -166,15 +166,16 @@ spec:
- alert: PrometheusRemoteStorageFailures - alert: PrometheusRemoteStorageFailures
annotations: annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} failed to send {{`{{`}} printf "%.1f" $value {{`}}`}}% of the samples to {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}} description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} failed to send {{`{{`}} printf "%.1f" $value {{`}}`}}% of the samples to {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusremotestoragefailures
summary: Prometheus fails to send samples to remote storage. summary: Prometheus fails to send samples to remote storage.
expr: |- expr: |-
( (
rate(prometheus_remote_storage_failed_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) (rate(prometheus_remote_storage_failed_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]))
/ /
( (
rate(prometheus_remote_storage_failed_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) (rate(prometheus_remote_storage_failed_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]))
+ +
rate(prometheus_remote_storage_succeeded_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) (rate(prometheus_remote_storage_succeeded_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) or rate(prometheus_remote_storage_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]))
) )
) )
* 100 * 100
@ -188,13 +189,14 @@ spec:
- alert: PrometheusRemoteWriteBehind - alert: PrometheusRemoteWriteBehind
annotations: annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} remote write is {{`{{`}} printf "%.1f" $value {{`}}`}}s behind for {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}. description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} remote write is {{`{{`}} printf "%.1f" $value {{`}}`}}s behind for {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusremotewritebehind
summary: Prometheus remote write is behind. summary: Prometheus remote write is behind.
expr: |- expr: |-
# Without max_over_time, failed scrapes could create false negatives, see # Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
( (
max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
- on(job, instance) group_right - ignoring(remote_name, url) group_right
max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
) )
> 120 > 120
@ -207,6 +209,7 @@ spec:
- alert: PrometheusRemoteWriteDesiredShards - alert: PrometheusRemoteWriteDesiredShards
annotations: annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} remote write desired shards calculation wants to run {{`{{`}} $value {{`}}`}} shards for queue {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}, which is more than the max of {{`{{`}} printf `prometheus_remote_storage_shards_max{instance="%s",job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}` $labels.instance | query | first | value {{`}}`}}. description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} remote write desired shards calculation wants to run {{`{{`}} $value {{`}}`}} shards for queue {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}, which is more than the max of {{`{{`}} printf `prometheus_remote_storage_shards_max{instance="%s",job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}` $labels.instance | query | first | value {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusremotewritedesiredshards
summary: Prometheus remote write desired shards calculation wants to run more than configured max shards. summary: Prometheus remote write desired shards calculation wants to run more than configured max shards.
expr: |- expr: |-
# Without max_over_time, failed scrapes could create false negatives, see # Without max_over_time, failed scrapes could create false negatives, see
@ -225,6 +228,7 @@ spec:
- alert: PrometheusRuleFailures - alert: PrometheusRuleFailures
annotations: annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed to evaluate {{`{{`}} printf "%.0f" $value {{`}}`}} rules in the last 5m. description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed to evaluate {{`{{`}} printf "%.0f" $value {{`}}`}} rules in the last 5m.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusrulefailures
summary: Prometheus is failing rule evaluations. summary: Prometheus is failing rule evaluations.
expr: increase(prometheus_rule_evaluation_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 expr: increase(prometheus_rule_evaluation_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
for: 15m for: 15m
@ -236,6 +240,7 @@ spec:
- alert: PrometheusMissingRuleEvaluations - alert: PrometheusMissingRuleEvaluations
annotations: annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has missed {{`{{`}} printf "%.0f" $value {{`}}`}} rule group evaluations in the last 5m. description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has missed {{`{{`}} printf "%.0f" $value {{`}}`}} rule group evaluations in the last 5m.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusmissingruleevaluations
summary: Prometheus is missing rule evaluations due to slow rule group evaluation. summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
expr: increase(prometheus_rule_group_iterations_missed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 expr: increase(prometheus_rule_group_iterations_missed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
for: 15m for: 15m
@ -247,6 +252,7 @@ spec:
- alert: PrometheusTargetLimitHit - alert: PrometheusTargetLimitHit
annotations: annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has dropped {{`{{`}} printf "%.0f" $value {{`}}`}} targets because the number of targets exceeded the configured target_limit. description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has dropped {{`{{`}} printf "%.0f" $value {{`}}`}} targets because the number of targets exceeded the configured target_limit.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheustargetlimithit
summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit. summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit.
expr: increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 expr: increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
for: 15m for: 15m
@ -254,5 +260,36 @@ spec:
severity: warning severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }} {{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusLabelLimitHit
annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has dropped {{`{{`}} printf "%.0f" $value {{`}}`}} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheuslabellimithit
summary: Prometheus has dropped targets because some scrape configs have exceeded the labels limit.
expr: increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
for: 15m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager
annotations:
description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% minimum errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} to any Alertmanager.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheuserrorsendingalertstoanyalertmanager
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
expr: |-
min without (alertmanager) (
rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}",alertmanager!~``}[5m])
/
rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}",alertmanager!~``}[5m])
)
* 100
> 3
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }} {{- end }}
{{- end }} {{- end }}

View File

@ -1,63 +0,0 @@
{{- /*
Generated from 'alertmanager.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.alertmanager }}
{{- $operatorJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "operator" }}
{{- $alertmanagerJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }}
{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: alertmanager.rules
rules:
- alert: AlertmanagerConfigInconsistent
annotations:
message: The configuration of the instances of the Alertmanager cluster `{{`{{`}}$labels.service{{`}}`}}` are out of sync.
expr: count_values("config_hash", alertmanager_config_hash{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{job="{{ $operatorJob }}",namespace="{{ $namespace }}",controller="alertmanager"}) by (name, job, namespace, controller), "service", "$1", "name", "(.*)") != 1
for: 5m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: AlertmanagerFailedReload
annotations:
message: Reloading Alertmanager's configuration has failed for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}.
expr: alertmanager_config_last_reload_successful{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} == 0
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: AlertmanagerMembersInconsistent
annotations:
message: Alertmanager has not found all other members of the cluster.
expr: |-
alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}
!= on (service) GROUP_LEFT()
count by (service) (alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"})
for: 5m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,179 +0,0 @@
{{- /*
Generated from 'etcd' group from https://raw.githubusercontent.com/etcd-io/website/master/content/en/docs/v3.4/op-guide/etcd3_alert.rules.yml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeEtcd.enabled .Values.defaultRules.rules.etcd }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "etcd" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: etcd
rules:
- alert: etcdInsufficientMembers
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": insufficient members ({{`{{`}} $value {{`}}`}}).'
expr: sum(up{job=~".*etcd.*"} == bool 1) by (job) < ((count(up{job=~".*etcd.*"}) by (job) + 1) / 2)
for: 3m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdNoLeader
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member {{`{{`}} $labels.instance {{`}}`}} has no leader.'
expr: etcd_server_has_leader{job=~".*etcd.*"} == 0
for: 1m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfLeaderChanges
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": instance {{`{{`}} $labels.instance {{`}}`}} has seen {{`{{`}} $value {{`}}`}} leader changes within the last hour.'
expr: rate(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}[15m]) > 3
for: 15m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfFailedGRPCRequests
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
/
sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method)
> 1
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfFailedGRPCRequests
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
/
sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method)
> 5
for: 5m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdGRPCRequestsSlow
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": gRPC requests to {{`{{`}} $labels.grpc_method {{`}}`}} are taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_type="unary"}[5m])) by (job, instance, grpc_service, grpc_method, le))
> 0.15
for: 10m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdMemberCommunicationSlow
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member communication with {{`{{`}} $labels.To {{`}}`}} is taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m]))
> 0.15
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfFailedProposals
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} proposal failures within the last hour on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5
for: 15m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighFsyncDurations
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile fync durations are {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
> 0.5
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighCommitDurations
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile commit durations {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
> 0.25
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfFailedHTTPRequests
annotations:
message: '{{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}'
expr: |-
sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
BY (method) > 0.01
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfFailedHTTPRequests
annotations:
message: '{{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
BY (method) > 0.05
for: 10m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHTTPRequestsSlow
annotations:
message: etcd instance {{`{{`}} $labels.instance {{`}}`}} HTTP requests to {{`{{`}} $labels.method {{`}}`}} are slow.
expr: |-
histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m]))
> 0.15
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,56 +0,0 @@
{{- /*
Generated from 'general.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.general }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "general.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: general.rules
rules:
- alert: TargetDown
annotations:
message: '{{`{{`}} $value {{`}}`}}% of the {{`{{`}} $labels.job {{`}}`}} targets are down.'
expr: 100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: Watchdog
annotations:
message: 'This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
and always fire against a receiver. There are integrations with various notification
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
'
expr: vector(1)
labels:
severity: none
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

Some files were not shown because too many files have changed in this diff Show More