2023-05-03 13:26:54 +00:00
{{- if index .Values "cert-manager" "prometheus" "servicemonitor" "enabled" }}
2021-09-29 14:30:37 +00:00
apiVersion : monitoring.coreos.com/v1
kind : PrometheusRule
metadata :
name : {{ printf "%s-%s" (include "kubezero-lib.fullname" $) "prometheus-rules" | trunc 63 | trimSuffix "-" }}
namespace : {{ .Release.Namespace }}
labels :
{{- include "kubezero-lib.labels" . | nindent 4 }}
spec :
groups :
- name : cert-manager
rules :
- alert : CertManagerAbsent
annotations :
description : New certificates will not be able to be minted, and existing ones can't be renewed until cert-manager is back.
runbook_url : https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerabsent
summary : Cert Manager has dissapeared from Prometheus service discovery.
expr : absent(up{job="cert-manager"})
for : 10m
labels :
severity : critical
- name : certificates
rules :
- alert : CertManagerCertExpirySoon
annotations :
dashboard_url : https://grafana.example.com/d/TvuRo2iMk/cert-manager
description : The domain that this cert covers will be unavailable after {{`{{`}} $value | humanizeDuration {{`}}`}}. Clients using endpoints that this cert protects will start to fail in {{`{{`}} $value | humanizeDuration {{`}}`}}.
runbook_url : https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertexpirysoon
summary : The cert `{{`{{`}} $labels.name {{`}}`}}` is {{`{{`}} $value | humanizeDuration {{`}}`}} from expiry, it should have renewed over a week ago.
expr : "avg by (exported_namespace, namespace, name) (\n certmanager_certificate_expiration_timestamp_seconds - time()\n) < (21 * 24 * 3600) # 21 days in seconds\n"
for : 1h
labels :
severity : warning
- alert : CertManagerCertNotReady
annotations :
dashboard_url : https://grafana.example.com/d/TvuRo2iMk/cert-manager
description : This certificate has not been ready to serve traffic for at least 10m. If the cert is being renewed or there is another valid cert, the ingress controller _may_ be able to serve that instead.
runbook_url : https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertnotready
summary : The cert `{{`{{`}} $labels.name {{`}}`}}` is not ready to serve traffic.
expr : "max by (name, exported_namespace, namespace, condition) (\n certmanager_certificate_ready_status{condition!=\"True\"} == 1\n)\n"
for : 10m
labels :
severity : critical
- alert : CertManagerHittingRateLimits
annotations :
dashboard_url : https://grafana.example.com/d/TvuRo2iMk/cert-manager
description : Depending on the rate limit, cert-manager may be unable to generate certificates for up to a week.
runbook_url : https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerhittingratelimits
summary : Cert manager hitting LetsEncrypt rate limits.
expr : "sum by (host) (\n rate(certmanager_http_acme_client_request_count{status=\"429\"}[5m])\n) > 0\n"
for : 5m
labels :
severity : critical
2022-12-15 21:51:31 +00:00
2023-05-03 13:26:54 +00:00
{{- end }}