diff --git a/charts/kubezero-cert-manager/.gitignore b/charts/kubezero-cert-manager/.gitignore new file mode 100644 index 00000000..22d0d82f --- /dev/null +++ b/charts/kubezero-cert-manager/.gitignore @@ -0,0 +1 @@ +vendor diff --git a/charts/kubezero-cert-manager/.helmignore b/charts/kubezero-cert-manager/.helmignore index 0e8a0eb3..2f309f27 100644 --- a/charts/kubezero-cert-manager/.helmignore +++ b/charts/kubezero-cert-manager/.helmignore @@ -21,3 +21,5 @@ .idea/ *.tmproj .vscode/ +vendor +rules diff --git a/charts/kubezero-cert-manager/Chart.yaml b/charts/kubezero-cert-manager/Chart.yaml index 4867131d..c99c7c61 100644 --- a/charts/kubezero-cert-manager/Chart.yaml +++ b/charts/kubezero-cert-manager/Chart.yaml @@ -2,7 +2,8 @@ apiVersion: v2 name: kubezero-cert-manager description: KubeZero Umbrella Chart for cert-manager type: application -version: 1.5.3 +version: 0.7.3 +appVersion: 1.5.3 home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png keywords: diff --git a/charts/kubezero-cert-manager/README.md b/charts/kubezero-cert-manager/README.md index 90a8b176..3ff2e6d9 100644 --- a/charts/kubezero-cert-manager/README.md +++ b/charts/kubezero-cert-manager/README.md @@ -1,6 +1,6 @@ # kubezero-cert-manager -![Version: 1.5.3](https://img.shields.io/badge/Version-1.5.3-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) +![Version: 0.7.3](https://img.shields.io/badge/Version-0.7.3-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.5.3](https://img.shields.io/badge/AppVersion-1.5.3-informational?style=flat-square) KubeZero Umbrella Chart for cert-manager @@ -39,6 +39,7 @@ If your resolvers need additional sercrets like CloudFlare API tokens etc. make | cert-manager.ingressShim.defaultIssuerName | string | `"letsencrypt-dns-prod"` | | | cert-manager.nodeSelector."node-role.kubernetes.io/master" | string | `""` | | | cert-manager.prometheus.servicemonitor.enabled | bool | `false` | | +| cert-manager.startupapicheck.enabled | bool | `false` | | | cert-manager.tolerations[0].effect | string | `"NoSchedule"` | | | cert-manager.tolerations[0].key | string | `"node-role.kubernetes.io/master"` | | | cert-manager.webhook.nodeSelector."node-role.kubernetes.io/master" | string | `""` | | diff --git a/charts/kubezero-cert-manager/cert-manager-rules.yaml b/charts/kubezero-cert-manager/cert-manager-rules.yaml new file mode 100644 index 00000000..2a72ef21 --- /dev/null +++ b/charts/kubezero-cert-manager/cert-manager-rules.yaml @@ -0,0 +1,3 @@ +rules: +- name: prometheus-rules + url: file://rules/cert-manager-mixin-prometheusRule diff --git a/charts/kubezero-cert-manager/dashboards.yaml b/charts/kubezero-cert-manager/dashboards.yaml new file mode 100644 index 00000000..db3fa8f4 --- /dev/null +++ b/charts/kubezero-cert-manager/dashboards.yaml @@ -0,0 +1,9 @@ +configmap: cert-manager-grafana-dashboard +gzip: true +folder: KubeZero +condition: 'index .Values "cert-manager" "prometheus" "servicemonitor" "enabled"' +dashboards: +- name: cert-manager + url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/raw/master/dashboards/cert-manager.json + tags: + - cert-manager diff --git a/charts/kubezero-cert-manager/jsonnetfile.json b/charts/kubezero-cert-manager/jsonnetfile.json new file mode 100644 index 00000000..854f92c0 --- /dev/null +++ b/charts/kubezero-cert-manager/jsonnetfile.json @@ -0,0 +1,24 @@ +{ + "version": 1, + "dependencies": [ + { + "source": { + "git": { + "remote": "https://github.com/prometheus-operator/kube-prometheus.git", + "subdir": "jsonnet/kube-prometheus" + } + }, + "version": "main" + }, + { + "source": { + "git": { + "remote": "https://gitlab.com/uneeq-oss/cert-manager-mixin.git", + "subdir": "" + } + }, + "version": "master" + } + ], + "legacyImports": true +} diff --git a/charts/kubezero-cert-manager/jsonnetfile.lock.json b/charts/kubezero-cert-manager/jsonnetfile.lock.json new file mode 100644 index 00000000..31e99f65 --- /dev/null +++ b/charts/kubezero-cert-manager/jsonnetfile.lock.json @@ -0,0 +1,170 @@ +{ + "version": 1, + "dependencies": [ + { + "source": { + "git": { + "remote": "https://github.com/brancz/kubernetes-grafana.git", + "subdir": "grafana" + } + }, + "version": "c3b14b24b83cfe9abf1064649d19e2d679f033fb", + "sum": "YrE4DNQsWgYWs6h0j/FjQETt8xDXdYdsslb1WK7xQEk=" + }, + { + "source": { + "git": { + "remote": "https://github.com/etcd-io/etcd.git", + "subdir": "contrib/mixin" + } + }, + "version": "3df272774672366beb02c5447782805ab5fec957", + "sum": "5XhYOigrKipOWDbIn9hlrz7JcbelzvJnormxSaup9JI=" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet-lib.git", + "subdir": "grafonnet" + } + }, + "version": "19b27b272abf4263af1365ec485784c49815a332", + "sum": "gF8foHByYcB25jcUOBqP6jxk0OPifQMjPvKY0HaCk6w=" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "grafana-builder" + } + }, + "version": "b7eae75972a369bf8ebfb03dcb0d4c14464ef85a", + "sum": "GRf2GvwEU4jhXV+JOonXSZ4wdDv8mnHBPCQ6TUVd+g8=" + }, + { + "source": { + "git": { + "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin.git", + "subdir": "" + } + }, + "version": "ff4641bcd83314c955150bea6b147df9ca335c4a", + "sum": "oUVGwcCbmdH8qz9B+lbRawI9s23GY9HeW7MwYZRbZ/0=" + }, + { + "source": { + "git": { + "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin.git", + "subdir": "lib/promgrafonnet" + } + }, + "version": "ff4641bcd83314c955150bea6b147df9ca335c4a", + "sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps=" + }, + { + "source": { + "git": { + "remote": "https://github.com/kubernetes/kube-state-metrics.git", + "subdir": "jsonnet/kube-state-metrics" + } + }, + "version": "8dab6f7472c26987ab7f8899a4a2f753fed8e8a8", + "sum": "S5qI+PJUdNeYOv76jH5nxwYS9N6U7CRxvyuB1wI4cTE=" + }, + { + "source": { + "git": { + "remote": "https://github.com/kubernetes/kube-state-metrics.git", + "subdir": "jsonnet/kube-state-metrics-mixin" + } + }, + "version": "8dab6f7472c26987ab7f8899a4a2f753fed8e8a8", + "sum": "u8gaydJoxEjzizQ8jY8xSjYgWooPmxw+wIWdDxifMAk=" + }, + { + "source": { + "git": { + "remote": "https://github.com/prometheus-operator/kube-prometheus.git", + "subdir": "jsonnet/kube-prometheus" + } + }, + "version": "a2eee1803a074fb40cad109d690732c22f0130cf", + "sum": "kqVnoNBux2YF1s03m+O3w/5jreAnjXx2/NjvNP1Hoy4=" + }, + { + "source": { + "git": { + "remote": "https://github.com/prometheus-operator/prometheus-operator.git", + "subdir": "jsonnet/mixin" + } + }, + "version": "42fc15967e35e0cca68cf935f844086edbc82d0e", + "sum": "6reUygVmQrLEWQzTKcH8ceDbvM+2ztK3z2VBR2K2l+U=", + "name": "prometheus-operator-mixin" + }, + { + "source": { + "git": { + "remote": "https://github.com/prometheus-operator/prometheus-operator.git", + "subdir": "jsonnet/prometheus-operator" + } + }, + "version": "42fc15967e35e0cca68cf935f844086edbc82d0e", + "sum": "sECNXs/aIEreFUma1BWVyknBygqh3AVJEB3msmrAYYY=" + }, + { + "source": { + "git": { + "remote": "https://github.com/prometheus/alertmanager.git", + "subdir": "doc/alertmanager-mixin" + } + }, + "version": "e35efbddb66a73fd8723be5334477e76f21fbd19", + "sum": "pep+dHzfIjh2SU5pEkwilMCAT/NoL6YYflV4x8cr7vU=", + "name": "alertmanager" + }, + { + "source": { + "git": { + "remote": "https://github.com/prometheus/node_exporter.git", + "subdir": "docs/node-mixin" + } + }, + "version": "0e6b23c338e98809c9872c70a2f5dfa8d6d370d4", + "sum": "MnfAA4+l2BkgJncnYfV8uHC7CxHZut8+ap8KkEqyB5Y=" + }, + { + "source": { + "git": { + "remote": "https://github.com/prometheus/prometheus.git", + "subdir": "documentation/prometheus-mixin" + } + }, + "version": "a05b510fc32c3ecc2fc369002576179ae1cbcc23", + "sum": "m4VHwft4fUcxzL4+52lLZG/V5aH5ZEdjaweb88vISL0=", + "name": "prometheus" + }, + { + "source": { + "git": { + "remote": "https://github.com/thanos-io/thanos.git", + "subdir": "mixin" + } + }, + "version": "360b39e1c6ab3ac8dcefa225a6205142f9362c68", + "sum": "Og+wEHfgzXBvBLAeeQvGNoiCw3FY4LQHlJdpsG/owj8=", + "name": "thanos-mixin" + }, + { + "source": { + "git": { + "remote": "https://gitlab.com/uneeq-oss/cert-manager-mixin.git", + "subdir": "" + } + }, + "version": "eae22f642aaa5d422e4766f6811df2158fc05539", + "sum": "DOg3fzS0OWrjjRPVsKgxID/rk9AC3ESQ4gDELc2RNgM=" + } + ], + "legacyImports": false +} diff --git a/charts/kubezero-cert-manager/rules.jsonnet b/charts/kubezero-cert-manager/rules.jsonnet new file mode 100644 index 00000000..1302a5c2 --- /dev/null +++ b/charts/kubezero-cert-manager/rules.jsonnet @@ -0,0 +1,9 @@ +local addMixin = (import 'kube-prometheus/lib/mixin.libsonnet'); + +local certManagerMixin = addMixin({ + name: 'cert-manager', + mixin: (import 'gitlab.com/uneeq-oss/cert-manager-mixin/mixin.libsonnet') + }); + +{ 'cert-manager-mixin-prometheusRule': certManagerMixin.prometheusRules } + diff --git a/charts/kubezero-cert-manager/rules/cert-manager-mixin-prometheusRule b/charts/kubezero-cert-manager/rules/cert-manager-mixin-prometheusRule new file mode 100644 index 00000000..7e858081 --- /dev/null +++ b/charts/kubezero-cert-manager/rules/cert-manager-mixin-prometheusRule @@ -0,0 +1,80 @@ +{ + "apiVersion": "monitoring.coreos.com/v1", + "kind": "PrometheusRule", + "metadata": { + "labels": { + "prometheus": "k8s" + }, + "name": "cert-manager", + "namespace": "monitoring" + }, + "spec": { + "groups": [ + { + "name": "cert-manager", + "rules": [ + { + "alert": "CertManagerAbsent", + "annotations": { + "description": "New certificates will not be able to be minted, and existing ones can't be renewed until cert-manager is back.", + "runbook_url": "https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerabsent", + "summary": "Cert Manager has dissapeared from Prometheus service discovery." + }, + "expr": "absent(up{job=\"cert-manager\"})", + "for": "10m", + "labels": { + "severity": "critical" + } + } + ] + }, + { + "name": "certificates", + "rules": [ + { + "alert": "CertManagerCertExpirySoon", + "annotations": { + "dashboard_url": "https://grafana.example.com/d/TvuRo2iMk/cert-manager", + "description": "The domain that this cert covers will be unavailable after {{ $value | humanizeDuration }}. Clients using endpoints that this cert protects will start to fail in {{ $value | humanizeDuration }}.", + "runbook_url": "https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertexpirysoon", + "summary": "The cert `{{ $labels.name }}` is {{ $value | humanizeDuration }} from expiry, it should have renewed over a week ago." + }, + "expr": "avg by (exported_namespace, namespace, name) (\n certmanager_certificate_expiration_timestamp_seconds - time()\n) < (21 * 24 * 3600) # 21 days in seconds\n", + "for": "1h", + "labels": { + "severity": "warning" + } + }, + { + "alert": "CertManagerCertNotReady", + "annotations": { + "dashboard_url": "https://grafana.example.com/d/TvuRo2iMk/cert-manager", + "description": "This certificate has not been ready to serve traffic for at least 10m. If the cert is being renewed or there is another valid cert, the ingress controller _may_ be able to serve that instead.", + "runbook_url": "https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertnotready", + "summary": "The cert `{{ $labels.name }}` is not ready to serve traffic." + }, + "expr": "max by (name, exported_namespace, namespace, condition) (\n certmanager_certificate_ready_status{condition!=\"True\"} == 1\n)\n", + "for": "10m", + "labels": { + "severity": "critical" + } + }, + { + "alert": "CertManagerHittingRateLimits", + "annotations": { + "dashboard_url": "https://grafana.example.com/d/TvuRo2iMk/cert-manager", + "description": "Depending on the rate limit, cert-manager may be unable to generate certificates for up to a week.", + "runbook_url": "https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerhittingratelimits", + "summary": "Cert manager hitting LetsEncrypt rate limits." + }, + "expr": "sum by (host) (\n rate(certmanager_http_acme_client_request_count{status=\"429\"}[5m])\n) > 0\n", + "for": "5m", + "labels": { + "severity": "critical" + } + } + ] + } + ] + } +} diff --git a/charts/kubezero-cert-manager/templates/grafana-dashboards.yaml b/charts/kubezero-cert-manager/templates/grafana-dashboards.yaml new file mode 100644 index 00000000..0c228299 --- /dev/null +++ b/charts/kubezero-cert-manager/templates/grafana-dashboards.yaml @@ -0,0 +1,15 @@ +{{- if index .Values "cert-manager" "prometheus" "servicemonitor" "enabled" }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ printf "%s-%s" (include "kubezero-lib.fullname" $) "cert-manager-grafana-dashboard" | trunc 63 | trimSuffix "-" }} + namespace: {{ .Release.Namespace }} + labels: + grafana_dashboard: "1" + {{- include "kubezero-lib.labels" . | nindent 4 }} + annotations: + k8s-sidecar-target-directory: KubeZero +binaryData: + cert-manager.json.gz: + H4sIAAAAAAAC/+1d+2/bOBL+vX8FoT0cmoWTWG6SJgfsAUmuuRZou0GTFthrCoOWaJsbmdSSVBJvkP/9htSLelh2/GjTVvnBsUiKjxnON9+MHr5/hpCDGeMKK8qZdP6F7qEICgMqFRx9NkcoKTU1g4gG6g2DSreTl/pYYckj4RGocM4FnxA1JpF0rDaE4UGg65WIiFU+pn5NKfU4O+UBF7pDMRrg590O6rkufOzvd5C7ZXfN8MQMfJyvBf0THQdEqMIU1DQ07XwsxwOOhe8kdQ/m/xf4fNDNHZ9IT9BQ96Tbmz4c4lNVWoIzYkS98aGERUEQlwgcji85DxQNUyk5VDfZP4q/KyJw0rN74B7s7e0fHvVednumNqDsWivi8xdzGGJGApmpIlXEfIGXl3A5JjDHyYAIxIfIA8nQIfWwIhJRhuBcJAj2p0iC/MhO3s2QksA/5WxIR9n2SPof4ihQslAK5V4kFZ/o0oeOXT7BYUjZKF9brpWxIHLMA7/clz6Lm83h4IHkQaSI0ynWS0VCae3U9O++dKwnlu6mkSCElToyLW5wEJFEl6Xah84jBhDEb+reLfddOP7yrK7GGt/hN0QIMJryusuiw8obE1GRabodncH0vbab8kwdHqZo4FzCPneeNQjCCQUP9WYiC2ohHtpSeYOgqqfP3xFz9sWseS26R+bvlDqdVjVb32qRnZCWZoqAuVL/nBeNxxnD4aG1aW71xutZBXdQ0LWOp/q40rdRV3aatTWsoYzQ3iVqiSVj9WvwMK3FADJ25Z8AFnQ4zaojxe1qLihhKgXLSjXYWeSR32vmpGeFA6+6A5wAS/Weq/egOHtjFxHJgJ7MkL+odl0+xIEktcapyJ0qgZ8pKyyxIuYwiEaUfSJCJkt9ubO3s5+jsMJiRFRxOYXVkrvQ7FsZTdBgip6D9wRvBZ1twXewzwlmeERE3wL+vgH8vgb8SG4VF0oZOClYbVUEARkR5p9xAfCia+/vs6EeHopNBRka1+gc55JO9/GXfGl0Qs6E8RipD83KL8Z0qKoVyvhg59R2Yh/0YiyBJZ5ery4ePRH2Izzo2lyfTzw6wcaNu0/UJ85xWat6xF9eHZ8cvjhqGuKgu3fY7a4yyFy/7va6RwfVMWaDbkElEaNmx/vKj0RCmOf753XCdaGgAa/3Wrz+XvB6QtlMfIY2NN5ofQ2GAGWTsC+JxluJ/o26W2gb6YrnJfBOAiqz5hKqQx9MVYKtRwH+AhjfaV7zDfEUF8/d3n4X/uonXzPBzc39ZDP+6YJzBlpD2k+hV1qZa3ZQpRDvwnhyHd7pcM4O8XbQJ2MJCOwZaU7fQWd6dyAu0Ed2zfgtW0/IV7RdHNARK4knGyPQYXAcTRets4y7tpN81uQLnGGsJqca0hhY7NZEOjoUj71H9aQEGpw/iKyp5DNOivXqVipSJ+SU4qmZoPazhcWHqzpmBubmfOuI2fBQFJviJiLn2NZ2bqmvxo1cZ1meswGZAPhQH4BG0WATIjH6bxCFRlJyCXB9LN9I7rRiWVEsXyUVcVhPbY+aqS3g5e1rsECjmVJGWXKhTqbNCKA9ah110lVUhgGOtTpbeU3CWRtpDPCABP0/OXBHfDMy8b7OgHeQ/pQh9uBrFpV3EJwGSyd+P6teMCmAfvsNuVsddOVkZ8adXDm6cDv+l9XFh9XR8mZXTontDTNqFmfWnyJdXUTajxJxE6/fQr9q6O7+kEI/2XweyLImgZmMl5rgxAyritFSEohd5SU/BT41YUXCV4s18dWw6Zkmy4Z9FLXlzEvj18yBixFm9G+y4ODkzgsin5ykmFTxRBrcaxSZ1SG3sbY3q9YEE+iX41n11d1Yj6m1Tc2kFm48c466yez1lZU1r1k2TqOfpswnd83qMFM6nC1vt9sk7YNZlSf68mKlMvMBdk5onujdRcXuurOFflRfpU97MVvKtTFaVVUvG/SkB+81KkkQ3XoBLdUEd5a0CwSgUfLFkGAxY7ExDb3PKhbV4Qqn9+pXnl5htzp2Zut4frNsoQ2zq2receao3upuZpT90OSEkqxM7Bhr0zKgdizNnQmykDd0BljIMnSZOw3eEjZS44Jxm3JS13yJy/pCa1lf1OdMCR4ERCA5ZR4S5K8I2IXcxNX8/DaFZVLgQxoENtSYgv8K7Ov0sY0C60yWVyOKMfV9wi6M56+owrjkl/mhRX56XUshMfcpThB4YtWJgSyFiNdXrpngu7pSympKdYhTDW4UV2Zu5da1GfBcAqBGU2v3Zgrj/IKtIpjjf2B3ngMPNtukt59rSfMyU5FmzXWB0xyqYX13zmWa4yq6VytmImALTAGHrihoXjgV6gnpPRXJwjXsMF1BsTtQDYSNJnB0hgHPQ+eEGv5e2NJ5pQabOgsHGu1dV0bRibmQ+G9BxJW6ZS7yJgYP0cYVQwYJimFH1qKvIaHv4SCAwoipz//o93Xzfrqxv2xdsdWuAicjLXkZ2M52fl4sLPhARim1//KYeCHHyQuNkx8SnNyFGMyKH7JbuAr5BayzmLW5hVISw1heP3UoQA3pDfUjHFQvJqVtzJW3fAJ3+I6WbGYQedfxDimm1LN8sF58zY1xpdb1EJJBRQ1gT/EdadiYeUQJPicsBU8mfq65CuAEfHSCJam5Kn5X1zyGQ6db7N1ay9woK58mnCXUJqdZKa6dZ8UKplWdpxdRyshuyt+Sm2zShVsZnzZr+ZAwlpShIMXR8em7VygUHKyEiJ+TspRzoO7BEpTl4OtQFn2d9tUkVNP6G3r/RwRviU5LdNZDdDSecL+DQqzGHZTcsTaD8oyVCvvYm5C+F2jb7CcgsxnaE8/s4QHd3+vJmS/x/J48DTKA+/ry8rwlQC0BagnQVyVAx4CxgFYovZOwmQyhNoGzNB2ylGfj+ouWD7V86EfjQ4tzoRR30svOfei5hhoBN9q9YpsbdRYlawmZERjS9xukQmuJ2WrErCVlT56Ulf0i8CpxvT0lQQCzrGj+mxC30/OP6KPU1A0zHwV0QpXsICxR4q80kcPoRjfzuCBPl7p110jdDubksXp7yxC3Xi1xcyc/36W34jPhLSkrk7J6XDXgkths6Qa4UvRSZwb7c3E76383cdc7v+4Wx8kffzs7Ozp8UfIRGSQt5CXy0d5q0Gkaq7d3tH+60FgVpP8WpDa9qTLkfsYmOWwy6FT0vTDqRxpvM85ozO0+a/HblaO553ZCPq+ch/l80roBElhJP95RCz9DNYuLpjVn2IsfKek1UlXtIoCjaq66+j2qRRleRwPSh6/9XI5g2MbN9WOXZeSqHZRskuTSYnvEraMVqbmNUjObf47cTjYhtzQ7891KLo0pmmV3+lSu0VuY3QY8SwU8ibOuPAbShj5t6LNM6HOeRzj6+VK9v+ELVuZIezMq0YBQNoICwRUYsr+DXtPRmAhThX3D6H7OkOhgLSHRXhsSrSkkAt/NiKesJ1J/vrhol9zBJmI42PaZrAQUjwySnmAokd8eUAgnvCEEESlC9UF1lC8ZVugBdhtGWa3vpxi0LB6wfGPyiC5jDYM7anlkyyNbHvl0eOQ7MuFiiiJFAyrjux/yXPrOz0gOK3c6LMcOD1p22CbM10EMYwvdaM48yUZ9hXy5yRj+mLnynHFOjMaSTPlgqjaUnVyJWTanJxOv8O0y4okINyC8Hyj5/R0I6fvKc5egtg1RlgpRzH5sg5M2OFk5OHlP1C0X14iyEWCA3CXm3/f3ONr+GqKSw7VEJYdtVDI7KmkjjyJZNy9tqvD17FVORkJkhBW9Idt/OE+PnJucs75f+paqMY8Uem42/TB+IZiurCSLWYw4wLE8AsuKuVWSMs5ebrJwOvqJ5o2Txa2F1q8g43R7/ZhCrjeeBd++9vUob+JiW867Guc92exjhotRyZby6s/4p5rA0LQN2U+XOdIbkwnOXW3vZVyspgFJCXDcEjx1rvYCDjnZCIpMwgCcHxst8ktdOVkq6EaSIL7wXYdF6Suvw7pf8LLfHWs1qKWvRVpdsgrgAwbOSsUJQtqmTZl5xeFxUMfM0o3k6MQuuohpvm2qQMdpzXnpy8Ss4MAm3hnnsn+Sw/krImI6UzRx9afsFd+FHz1It4VbKB2Ru3JTeU3DjyLQL6WpmXb+42XZtGf8epmi1qvtsreiM3677aaol7y/nMWBV35aSAH38tcap7PPHKANTY5rP0RZeKLSpvf71nfXPnjRtWssPO5Z393kN9qKq/ubs1zQxTeBoneJ2ZiqKH6z5uVN9IH36LvE2G4yg3SfPfwfcWiyPgZvAAA= +{{- end }} diff --git a/charts/kubezero-cert-manager/templates/prometheus-rules.yaml b/charts/kubezero-cert-manager/templates/prometheus-rules.yaml new file mode 100644 index 00000000..cbf455b0 --- /dev/null +++ b/charts/kubezero-cert-manager/templates/prometheus-rules.yaml @@ -0,0 +1,53 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kubezero-lib.fullname" $) "prometheus-rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "kubezero-lib.labels" . | nindent 4 }} +spec: + groups: + - name: cert-manager + rules: + - alert: CertManagerAbsent + annotations: + description: New certificates will not be able to be minted, and existing ones can't be renewed until cert-manager is back. + runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerabsent + summary: Cert Manager has dissapeared from Prometheus service discovery. + expr: absent(up{job="cert-manager"}) + for: 10m + labels: + severity: critical + - name: certificates + rules: + - alert: CertManagerCertExpirySoon + annotations: + dashboard_url: https://grafana.example.com/d/TvuRo2iMk/cert-manager + description: The domain that this cert covers will be unavailable after {{`{{`}} $value | humanizeDuration {{`}}`}}. Clients using endpoints that this cert protects will start to fail in {{`{{`}} $value | humanizeDuration {{`}}`}}. + runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertexpirysoon + summary: The cert `{{`{{`}} $labels.name {{`}}`}}` is {{`{{`}} $value | humanizeDuration {{`}}`}} from expiry, it should have renewed over a week ago. + expr: "avg by (exported_namespace, namespace, name) (\n certmanager_certificate_expiration_timestamp_seconds - time()\n) < (21 * 24 * 3600) # 21 days in seconds\n" + for: 1h + labels: + severity: warning + - alert: CertManagerCertNotReady + annotations: + dashboard_url: https://grafana.example.com/d/TvuRo2iMk/cert-manager + description: This certificate has not been ready to serve traffic for at least 10m. If the cert is being renewed or there is another valid cert, the ingress controller _may_ be able to serve that instead. + runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertnotready + summary: The cert `{{`{{`}} $labels.name {{`}}`}}` is not ready to serve traffic. + expr: "max by (name, exported_namespace, namespace, condition) (\n certmanager_certificate_ready_status{condition!=\"True\"} == 1\n)\n" + for: 10m + labels: + severity: critical + - alert: CertManagerHittingRateLimits + annotations: + dashboard_url: https://grafana.example.com/d/TvuRo2iMk/cert-manager + description: Depending on the rate limit, cert-manager may be unable to generate certificates for up to a week. + runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerhittingratelimits + summary: Cert manager hitting LetsEncrypt rate limits. + expr: "sum by (host) (\n rate(certmanager_http_acme_client_request_count{status=\"429\"}[5m])\n) > 0\n" + for: 5m + labels: + severity: critical + diff --git a/charts/kubezero-cert-manager/update.sh b/charts/kubezero-cert-manager/update.sh new file mode 100755 index 00000000..783a143c --- /dev/null +++ b/charts/kubezero-cert-manager/update.sh @@ -0,0 +1,22 @@ +#!/bin/bash +set -ex + +# Fetch dashboards from Grafana.com and update ZDT CM +../kubezero-metrics/sync_grafana_dashboards.py dashboards.yaml templates/grafana-dashboards.yaml + +# Get kube-mixin for alerts +which jsonnet > /dev/null || { echo "Required jsonnet not found!"; exit 1;} +which jb > /dev/null || { echo "Required jb ( json-bundler ) not found!"; exit 1;} + +[ -r jsonnetfile.json ] || jb init +if [ -r jsonnetfile.lock.json ]; then + jb update +else + jb install github.com/prometheus-operator/kube-prometheus/jsonnet/kube-prometheus@main + jb install gitlab.com/uneeq-oss/cert-manager-mixin@master +fi + +rm -rf rules && mkdir -p rules +jsonnet -J vendor -m rules rules.jsonnet + +../kubezero-metrics/sync_prometheus_rules.py cert-manager-rules.yaml templates diff --git a/charts/kubezero-cert-manager/values.yaml b/charts/kubezero-cert-manager/values.yaml index 3fb9601c..2742fe5f 100644 --- a/charts/kubezero-cert-manager/values.yaml +++ b/charts/kubezero-cert-manager/values.yaml @@ -78,4 +78,8 @@ cert-manager: prometheus: servicemonitor: enabled: false + # cert-manager.podAnnotations -- "iam.amazonaws.com/roleIAM:" role ARN the cert-manager might use via kiam eg."arn:aws:iam::123456789012:role/certManagerRoleArn" + + startupapicheck: + enabled: false