2020-12-18 00:46:15 +00:00
{{- /*
2021-06-30 16:26:06 +00:00
Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
2020-12-18 00:46:15 +00:00
Do not change in-place! In order to change this file first read following link :
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesApps }}
{{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }}
apiVersion : monitoring.coreos.com/v1
kind : PrometheusRule
metadata :
name : {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-apps" | trunc 63 | trimSuffix "-" }}
namespace : {{ template "kube-prometheus-stack.namespace" . }}
labels :
app : {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations :
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec :
groups :
- name : kubernetes-apps
rules :
- alert : KubePodCrashLooping
annotations :
2021-02-26 21:25:43 +00:00
description : Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is restarting {{`{{`}} printf "%.2f" $value {{`}}`}} times / 10 minutes.
2020-12-18 00:46:15 +00:00
runbook_url : {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodcrashlooping
summary : Pod is crash looping.
2021-06-30 16:26:06 +00:00
expr : |-
increase(kube_pod_container_status_restarts_total{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[10m]) > 0
and
2021-07-29 11:50:16 +00:00
kube_pod_container_status_waiting{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} == 1
2020-12-18 00:46:15 +00:00
for : 15m
labels :
severity : warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert : KubePodNotReady
annotations :
description : Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than 15 minutes.
runbook_url : {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodnotready
summary : Pod has been in a non-ready state for more than 15 minutes.
expr : |-
sum by (namespace, pod) (
max by(namespace, pod) (
kube_pod_status_phase{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}", phase=~"Pending|Unknown"}
) * on(namespace, pod) group_left(owner_kind) topk by(namespace, pod) (
1 , max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!="Job"})
)
) > 0
for : 15m
labels :
severity : warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert : KubeDeploymentGenerationMismatch
annotations :
description : Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back.
runbook_url : {{ .Values.defaultRules.runbookUrl }}alert-name-kubedeploymentgenerationmismatch
summary : Deployment generation mismatch due to possible roll-back
expr : |-
kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
!=
kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
for : 15m
labels :
severity : warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert : KubeDeploymentReplicasMismatch
annotations :
description : Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.
runbook_url : {{ .Values.defaultRules.runbookUrl }}alert-name-kubedeploymentreplicasmismatch
summary : Deployment has not matched the expected number of replicas.
expr : |-
(
kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
2021-07-14 16:15:47 +00:00
>
2020-12-18 00:46:15 +00:00
kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
) and (
2021-05-11 08:07:38 +00:00
changes(kube_deployment_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[10m])
2020-12-18 00:46:15 +00:00
==
0
)
for : 15m
labels :
severity : warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert : KubeStatefulSetReplicasMismatch
annotations :
description : StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.
runbook_url : {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetreplicasmismatch
summary : Deployment has not matched the expected number of replicas.
expr : |-
(
kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
!=
kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
) and (
2021-05-11 08:07:38 +00:00
changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[10m])
2020-12-18 00:46:15 +00:00
==
0
)
for : 15m
labels :
severity : warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert : KubeStatefulSetGenerationMismatch
annotations :
description : StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
runbook_url : {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetgenerationmismatch
summary : StatefulSet generation mismatch due to possible roll-back
expr : |-
kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
!=
kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
for : 15m
labels :
severity : warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert : KubeStatefulSetUpdateNotRolledOut
annotations :
description : StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out.
runbook_url : {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetupdatenotrolledout
summary : StatefulSet update has not been rolled out.
expr : |-
(
max without (revision) (
kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
unless
kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
)
*
(
kube_statefulset_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
!=
kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
)
) and (
changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[5m])
==
0
)
for : 15m
labels :
severity : warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert : KubeDaemonSetRolloutStuck
annotations :
description : DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15 minutes.
runbook_url : {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetrolloutstuck
summary : DaemonSet rollout is stuck.
expr : |-
(
(
kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
!=
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
) or (
kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
!=
0
) or (
kube_daemonset_updated_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
!=
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
) or (
kube_daemonset_status_number_available{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
!=
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
)
) and (
changes(kube_daemonset_updated_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[5m])
==
0
)
for : 15m
labels :
severity : warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert : KubeContainerWaiting
annotations :
description : Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour.
runbook_url : {{ .Values.defaultRules.runbookUrl }}alert-name-kubecontainerwaiting
summary : Pod container waiting longer than 1 hour
expr : sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}) > 0
for : 1h
labels :
severity : warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert : KubeDaemonSetNotScheduled
annotations :
description : '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled.'
runbook_url : {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetnotscheduled
summary : DaemonSet pods are not scheduled.
expr : |-
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
-
kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
for : 10m
labels :
severity : warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert : KubeDaemonSetMisScheduled
annotations :
description : '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run.'
runbook_url : {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetmisscheduled
summary : DaemonSet pods are misscheduled.
expr : kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
for : 15m
labels :
severity : warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert : KubeJobCompletion
annotations :
description : Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than 12 hours to complete.
runbook_url : {{ .Values.defaultRules.runbookUrl }}alert-name-kubejobcompletion
summary : Job did not complete in time
expr : kube_job_spec_completions{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - kube_job_status_succeeded{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
for : 12h
labels :
severity : warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert : KubeJobFailed
annotations :
description : Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete. Removing failed job after investigation should clear this alert.
runbook_url : {{ .Values.defaultRules.runbookUrl }}alert-name-kubejobfailed
summary : Job failed to complete.
expr : kube_job_failed{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
for : 15m
labels :
severity : warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert : KubeHpaReplicasMismatch
annotations :
2021-07-14 16:15:47 +00:00
description : HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes.
2020-12-18 00:46:15 +00:00
runbook_url : {{ .Values.defaultRules.runbookUrl }}alert-name-kubehpareplicasmismatch
summary : HPA has not matched descired number of replicas.
expr : |-
2021-06-30 16:26:06 +00:00
(kube_horizontalpodautoscaler_status_desired_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
2020-12-18 00:46:15 +00:00
!=
2021-06-30 16:26:06 +00:00
kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
2020-12-18 00:46:15 +00:00
and
2021-06-30 16:26:06 +00:00
(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
2020-12-18 00:46:15 +00:00
>
2021-06-30 16:26:06 +00:00
kube_horizontalpodautoscaler_spec_min_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
2020-12-18 00:46:15 +00:00
and
2021-06-30 16:26:06 +00:00
(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
2020-12-18 00:46:15 +00:00
<
2021-06-30 16:26:06 +00:00
kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
2020-12-18 00:46:15 +00:00
and
2021-06-30 16:26:06 +00:00
changes(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[15m]) == 0
2020-12-18 00:46:15 +00:00
for : 15m
labels :
severity : warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert : KubeHpaMaxedOut
annotations :
2021-07-14 16:15:47 +00:00
description : HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has been running at max replicas for longer than 15 minutes.
2020-12-18 00:46:15 +00:00
runbook_url : {{ .Values.defaultRules.runbookUrl }}alert-name-kubehpamaxedout
summary : HPA is running at max replicas
expr : |-
2021-06-30 16:26:06 +00:00
kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
2020-12-18 00:46:15 +00:00
==
2021-06-30 16:26:06 +00:00
kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
2020-12-18 00:46:15 +00:00
for : 15m
labels :
severity : warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}