From 8a8c4fbe66b04fdeaefc0ddaf6c397e35a0d9ee9 Mon Sep 17 00:00:00 2001 From: Stefan Reimer Date: Fri, 26 Feb 2021 22:25:43 +0100 Subject: [PATCH] Prometheus-stack version bump --- charts/kubezero-metrics/Chart.yaml | 6 +- .../charts/kube-prometheus-stack/Chart.yaml | 17 +- .../charts/kube-prometheus-stack/README.md | 16 +- .../charts/grafana/Chart.yaml | 9 +- .../charts/grafana/README.md | 75 ++++--- .../charts/grafana/templates/_helpers.tpl | 11 + .../charts/grafana/templates/_pod.tpl | 23 ++ .../configmap-dashboard-provider.yaml | 3 + .../charts/grafana/templates/configmap.yaml | 3 + .../templates/image-renderer-deployment.yaml | 3 + .../charts/grafana/templates/ingress.yaml | 7 +- .../grafana/templates/podsecuritypolicy.yaml | 1 + .../charts/grafana/templates/pvc.yaml | 5 + .../charts/grafana/templates/role.yaml | 2 +- .../charts/grafana/templates/rolebinding.yaml | 2 +- .../charts/grafana/templates/secret.yaml | 2 +- .../grafana/templates/servicemonitor.yaml | 8 +- .../charts/grafana/templates/statefulset.yaml | 7 +- .../tests/test-podsecuritypolicy.yaml | 1 + .../charts/grafana/values.yaml | 49 ++++- .../charts/kube-state-metrics/Chart.yaml | 13 +- .../charts/kube-state-metrics/LICENSE | 202 ++++++++++++++++++ .../charts/kube-state-metrics/OWNERS | 6 + .../charts/kube-state-metrics/README.md | 141 +++++------- .../templates/clusterrolebinding.yaml | 6 +- .../templates/deployment.yaml | 12 +- .../kube-state-metrics/templates/pdb.yaml | 5 +- .../templates/{clusterrole.yaml => role.yaml} | 80 ++++--- .../templates/rolebinding.yaml | 27 +++ .../kube-state-metrics/templates/service.yaml | 6 + .../templates/servicemonitor.yaml | 9 + .../charts/kube-state-metrics/values.yaml | 22 +- .../prometheus-node-exporter/Chart.yaml | 3 +- .../templates/daemonset.yaml | 18 +- .../templates/monitor.yaml | 7 + .../templates/serviceaccount.yaml | 2 + .../prometheus-node-exporter/values.yaml | 17 +- .../crds/crd-alertmanagerconfigs.yaml | 45 ++-- .../crds/crd-alertmanagers.yaml | 2 +- .../crds/crd-podmonitors.yaml | 2 +- .../crds/crd-probes.yaml | 2 +- .../crds/crd-prometheuses.yaml | 11 +- .../crds/crd-prometheusrules.yaml | 2 +- .../crds/crd-servicemonitors.yaml | 2 +- .../crds/crd-thanosrulers.yaml | 2 +- .../templates/alertmanager/alertmanager.yaml | 22 +- .../templates/alertmanager/ingress.yaml | 11 +- .../alertmanager/ingressperreplica.yaml | 6 +- .../kube-state-metrics/serviceMonitor.yaml | 4 + .../grafana/configmaps-datasources.yaml | 5 +- .../dashboards-1.14/cluster-total.yaml | 2 +- .../grafana/dashboards-1.14/etcd.yaml | 2 +- .../grafana/dashboards-1.14/kubelet.yaml | 4 +- .../dashboards-1.14/namespace-by-pod.yaml | 2 +- .../namespace-by-workload.yaml | 2 +- .../node-cluster-rsrc-use.yaml | 22 +- .../dashboards-1.14/node-rsrc-use.yaml | 22 +- .../grafana/dashboards-1.14/nodes.yaml | 20 +- .../persistentvolumesusage.yaml | 4 +- .../grafana/dashboards-1.14/pod-total.yaml | 2 +- .../prometheus-remote-write.yaml | 20 +- .../grafana/dashboards-1.14/prometheus.yaml | 6 +- .../dashboards-1.14/workload-total.yaml | 2 +- .../templates/grafana/dashboards/etcd.yaml | 2 +- .../job-patch/clusterrole.yaml | 2 +- .../job-patch/clusterrolebinding.yaml | 2 +- .../job-patch/job-createSecret.yaml | 2 +- .../job-patch/job-patchWebhook.yaml | 2 +- .../admission-webhooks/job-patch/psp.yaml | 2 +- .../admission-webhooks/job-patch/role.yaml | 2 +- .../job-patch/rolebinding.yaml | 2 +- .../job-patch/serviceaccount.yaml | 2 +- .../mutatingWebhookConfiguration.yaml | 8 + .../validatingWebhookConfiguration.yaml | 8 + .../prometheus-operator/certmanager.yaml | 57 +++++ .../prometheus-operator/deployment.yaml | 37 ++-- .../prometheus-operator/servicemonitor.yaml | 2 +- .../templates/prometheus/ingress.yaml | 9 +- .../prometheus/ingressThanosSidecar.yaml | 13 +- .../prometheus/ingressperreplica.yaml | 6 +- .../templates/prometheus/prometheus.yaml | 19 +- .../templates/prometheus/psp.yaml | 7 + .../templates/prometheus/rules-1.14/etcd.yaml | 30 +-- .../prometheus/rules-1.14/k8s.rules.yaml | 6 +- .../kube-apiserver-availability.rules.yaml | 98 ++++----- .../rules-1.14/kube-apiserver-slos.yaml | 2 +- .../rules-1.14/kube-apiserver.rules.yaml | 7 +- .../rules-1.14/kube-scheduler.rules.yaml | 2 +- .../prometheus/rules-1.14/kubelet.rules.yaml | 2 +- .../rules-1.14/kubernetes-apps.yaml | 6 +- .../rules-1.14/kubernetes-resources.yaml | 2 +- .../rules-1.14/kubernetes-storage.yaml | 2 +- .../kubernetes-system-apiserver.yaml | 20 +- .../kubernetes-system-controller-manager.yaml | 2 +- .../rules-1.14/kubernetes-system-kubelet.yaml | 2 +- .../kubernetes-system-scheduler.yaml | 2 +- .../rules-1.14/kubernetes-system.yaml | 2 +- .../rules-1.14/node-exporter.rules.yaml | 2 +- .../prometheus/rules-1.14/node-exporter.yaml | 18 +- .../prometheus/rules-1.14/node.rules.yaml | 4 +- .../templates/prometheus/rules/etcd.yaml | 30 +-- .../templates/prometheus/service.yaml | 8 + .../prometheus/serviceThanosSIdecar.yaml | 27 +++ .../templates/prometheus/servicemonitors.yaml | 4 + .../charts/kube-prometheus-stack/values.yaml | 156 ++++++++++++-- charts/kubezero-metrics/update.sh | 4 +- 106 files changed, 1181 insertions(+), 493 deletions(-) create mode 100644 charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/LICENSE create mode 100644 charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/OWNERS rename charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/{clusterrole.yaml => role.yaml} (61%) create mode 100644 charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/rolebinding.yaml create mode 100644 charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/certmanager.yaml create mode 100644 charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/serviceThanosSIdecar.yaml diff --git a/charts/kubezero-metrics/Chart.yaml b/charts/kubezero-metrics/Chart.yaml index 9b2dd2e..dee19cf 100644 --- a/charts/kubezero-metrics/Chart.yaml +++ b/charts/kubezero-metrics/Chart.yaml @@ -16,11 +16,11 @@ dependencies: version: ">= 0.1.3" repository: https://zero-down-time.github.io/kubezero/ - name: kube-prometheus-stack - version: 12.8.0 + version: 13.13.0 # Switch back to upstream once all alerts are fixed eg. etcd gpcr # repository: https://prometheus-community.github.io/helm-charts - name: prometheus-adapter - version: 2.10.1 + version: 2.12.1 repository: https://prometheus-community.github.io/helm-charts condition: prometheus-adapter.enabled -kubeVersion: ">= 1.16.0" +kubeVersion: ">= 1.18.0" diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/Chart.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/Chart.yaml index a410ff2..a3e49b1 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/Chart.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/Chart.yaml @@ -6,24 +6,21 @@ annotations: url: https://github.com/prometheus-operator/kube-prometheus artifacthub.io/operator: "true" apiVersion: v2 -appVersion: 0.44.0 +appVersion: 0.45.0 dependencies: - condition: kubeStateMetrics.enabled name: kube-state-metrics - repository: https://charts.helm.sh/stable - version: 2.9.* + repository: https://kubernetes.github.io/kube-state-metrics + version: 2.13.* - condition: nodeExporter.enabled name: prometheus-node-exporter repository: https://prometheus-community.github.io/helm-charts - version: 1.12.* + version: 1.14.* - condition: grafana.enabled name: grafana repository: https://grafana.github.io/helm-charts - version: 5.8.* -description: kube-prometheus-stack collects Kubernetes manifests, Grafana dashboards, - and Prometheus rules combined with documentation and scripts to provide easy to - operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus - Operator. + version: 6.4.* +description: kube-prometheus-stack collects Kubernetes manifests, Grafana dashboards, and Prometheus rules combined with documentation and scripts to provide easy to operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus Operator. home: https://github.com/prometheus-operator/kube-prometheus icon: https://raw.githubusercontent.com/prometheus/prometheus.github.io/master/assets/prometheus_logo-cb55bb5c346.png keywords: @@ -47,4 +44,4 @@ sources: - https://github.com/prometheus-community/helm-charts - https://github.com/prometheus-operator/kube-prometheus type: application -version: 12.8.0 +version: 13.13.0 diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/README.md b/charts/kubezero-metrics/charts/kube-prometheus-stack/README.md index 37794b1..68c1f82 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/README.md +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/README.md @@ -15,7 +15,6 @@ _Note: This chart was formerly named `prometheus-operator` chart, now renamed to ```console helm repo add prometheus-community https://prometheus-community.github.io/helm-charts -helm repo add stable https://charts.helm.sh/stable helm repo update ``` @@ -36,7 +35,7 @@ _See [helm install](https://helm.sh/docs/helm/helm_install/) for command documen By default this chart installs additional, dependent charts: -- [stable/kube-state-metrics](https://github.com/helm/charts/tree/master/stable/kube-state-metrics) +- [kubernetes/kube-state-metrics](https://github.com/kubernetes/kube-state-metrics/tree/master/charts/kube-state-metrics) - [prometheus-community/prometheus-node-exporter](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter) - [grafana/grafana](https://github.com/grafana/helm-charts/tree/main/charts/grafana) @@ -84,6 +83,15 @@ _See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documen A major chart version change (like v1.2.3 -> v2.0.0) indicates that there is an incompatible breaking change needing manual actions. +### From 12.x to 13.x + +Version 12 upgrades prometheus-operator from 0.44.x to 0.45.x. Helm does not automatically upgrade or install new CRDs on a chart upgrade, so you have to install the CRD manually before updating: + +```console +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.45.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.45.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +``` + ### From 11.x to 12.x The chart was migrated to support only helm v3 and later. @@ -181,6 +189,8 @@ A validating and mutating webhook configuration requires the endpoint to which t It should be possible to use [jetstack/cert-manager](https://github.com/jetstack/cert-manager) if a more complete solution is required, but it has not been tested. +You can enable automatic self-signed TLS certificate provisioning via cert-manager by setting the `prometheusOperator.admissionWebhooks.certManager.enabled` value to true. + ### Limitations Because the operator can only run as a single pod, there is potential for this component failure to cause rule deployment failure. Because this risk is outweighed by the benefit of having validation, the feature is enabled by default. @@ -311,7 +321,7 @@ You can check out the tickets for this change [here](https://github.com/promethe The chart has added 3 [dependencies](#dependencies). - Node-Exporter, Kube-State-Metrics: These components are loaded as dependencies into the chart, and are relatively simple components -- Grafana: The Grafana chart is more feature-rich than this chart - it contains a sidecar that is able to load data sources and dashboards from configmaps deployed into the same cluster. For more information check out the [documentation for the chart](https://github.com/helm/charts/tree/master/stable/grafana) +- Grafana: The Grafana chart is more feature-rich than this chart - it contains a sidecar that is able to load data sources and dashboards from configmaps deployed into the same cluster. For more information check out the [documentation for the chart](https://github.com/grafana/helm-charts/blob/main/charts/grafana/README.md) #### Kubelet Service diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/Chart.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/Chart.yaml index 2f6e076..bd2ea66 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/Chart.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/Chart.yaml @@ -1,5 +1,5 @@ -apiVersion: v1 -appVersion: 7.2.1 +apiVersion: v2 +appVersion: 7.4.2 description: The leading tool for querying and visualizing time series and metrics. home: https://grafana.net icon: https://raw.githubusercontent.com/grafana/grafana/master/public/img/logo_transparent_400x.png @@ -13,7 +13,10 @@ maintainers: name: maorfr - email: miroslav.hadzhiev@gmail.com name: Xtigyro +- email: mail@torstenwalter.de + name: torstenwalter name: grafana sources: - https://github.com/grafana/grafana -version: 5.8.16 +type: application +version: 6.4.4 diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/README.md b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/README.md index 2dc1822..815f6fa 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/README.md +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/README.md @@ -16,7 +16,7 @@ _See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation To install the chart with the release name `my-release`: ```console -helm install --name my-release grafana/grafana +helm install my-release grafana/grafana ``` ## Uninstalling the Chart @@ -42,6 +42,10 @@ This version requires Helm >= 2.12.0. You have to add --force to your helm upgrade command as the labels of the chart have changed. +### To 6.0.0 + +This version requires Helm >= 3.1.0. + ## Configuration | Parameter | Description | Default | @@ -55,7 +59,7 @@ You have to add --force to your helm upgrade command as the labels of the chart | `securityContext` | Deployment securityContext | `{"runAsUser": 472, "runAsGroup": 472, "fsGroup": 472}` | | `priorityClassName` | Name of Priority Class to assign pods | `nil` | | `image.repository` | Image repository | `grafana/grafana` | -| `image.tag` | Image tag (`Must be >= 5.0.0`) | `7.0.3` | +| `image.tag` | Image tag (`Must be >= 5.0.0`) | `7.4.2` | | `image.sha` | Image sha (optional) | `17cbd08b9515fda889ca959e9d72ee6f3327c8f1844a3336dfd952134f38e2fe` | | `image.pullPolicy` | Image pull policy | `IfNotPresent` | | `image.pullSecrets` | Image pull secrets | `{}` | @@ -96,6 +100,8 @@ You have to add --force to your helm upgrade command as the labels of the chart | `persistence.annotations` | PersistentVolumeClaim annotations | `{}` | | `persistence.finalizers` | PersistentVolumeClaim finalizers | `[ "kubernetes.io/pvc-protection" ]` | | `persistence.subPath` | Mount a sub dir of the persistent volume | `nil` | +| `persistence.inMemory.enabled` | If persistence is not enabled, whether to mount the local storage in-memory to improve performance | `false` | +| `persistence.inMemory.sizeLimit` | SizeLimit for the in-memory local storage | `nil` | | `initChownData.enabled` | If false, don't reset data ownership at startup | true | | `initChownData.image.repository` | init-chown-data container image repository | `busybox` | | `initChownData.image.tag` | init-chown-data container image tag | `1.31.1` | @@ -126,8 +132,8 @@ You have to add --force to your helm upgrade command as the labels of the chart | `podAnnotations` | Pod annotations | `{}` | | `podLabels` | Pod labels | `{}` | | `podPortName` | Name of the grafana port on the pod | `grafana` | -| `sidecar.image.repository` | Sidecar image repository | `kiwigrid/k8s-sidecar` | -| `sidecar.image.tag` | Sidecar image tag | `1.1.0` | +| `sidecar.image.repository` | Sidecar image repository | `quay.io/kiwigrid/k8s-sidecar` | +| `sidecar.image.tag` | Sidecar image tag | `1.10.6` | | `sidecar.image.sha` | Sidecar image sha (optional) | `""` | | `sidecar.imagePullPolicy` | Sidecar image pull policy | `IfNotPresent` | | `sidecar.resources` | Sidecar resources | `{}` | @@ -144,14 +150,16 @@ You have to add --force to your helm upgrade command as the labels of the chart | `sidecar.dashboards.watchMethod` | Method to use to detect ConfigMap changes. With WATCH the sidecar will do a WATCH requests, with SLEEP it will list all ConfigMaps, then sleep for 60 seconds. | `WATCH` | | `sidecar.skipTlsVerify` | Set to true to skip tls verification for kube api calls | `nil` | | `sidecar.dashboards.label` | Label that config maps with dashboards should have to be added | `grafana_dashboard` | +| `sidecar.dashboards.labelValue` | Label value that config maps with dashboards should have to be added | `nil` | | `sidecar.dashboards.folder` | Folder in the pod that should hold the collected dashboards (unless `sidecar.dashboards.defaultFolderName` is set). This path will be mounted. | `/tmp/dashboards` | | `sidecar.dashboards.folderAnnotation` | The annotation the sidecar will look for in configmaps to override the destination folder for files | `nil` | | `sidecar.dashboards.defaultFolderName` | The default folder name, it will create a subfolder under the `sidecar.dashboards.folder` and put dashboards in there instead | `nil` | | `sidecar.dashboards.searchNamespace` | If specified, the sidecar will search for dashboard config-maps inside this namespace. Otherwise the namespace in which the sidecar is running will be used. It's also possible to specify ALL to search in all namespaces | `nil` | | `sidecar.datasources.enabled` | Enables the cluster wide search for datasources and adds/updates/deletes them in grafana |`false` | | `sidecar.datasources.label` | Label that config maps with datasources should have to be added | `grafana_datasource` | +| `sidecar.datasources.labelValue` | Label value that config maps with datasources should have to be added | `nil` | | `sidecar.datasources.searchNamespace` | If specified, the sidecar will search for datasources config-maps inside this namespace. Otherwise the namespace in which the sidecar is running will be used. It's also possible to specify ALL to search in all namespaces | `nil` | -| `sidecar.notifiers.enabled` | Enables the cluster wide search for notifiers and adds/updates/deletes them in grafana |`false` | +| `sidecar.notifiers.enabled` | Enables the cluster wide search for notifiers and adds/updates/deletes them in grafana | `false` | | `sidecar.notifiers.label` | Label that config maps with notifiers should have to be added | `grafana_notifier` | | `sidecar.notifiers.searchNamespace` | If specified, the sidecar will search for notifiers config-maps (or secrets) inside this namespace. Otherwise the namespace in which the sidecar is running will be used. It's also possible to specify ALL to search in all namespaces | `nil` | | `smtp.existingSecret` | The name of an existing secret containing the SMTP credentials. | `""` | @@ -173,9 +181,9 @@ You have to add --force to your helm upgrade command as the labels of the chart | `rbac.extraClusterRoleRules` | Additional rules to add to the ClusterRole | [] | | `command` | Define command to be executed by grafana container at startup | `nil` | | `testFramework.enabled` | Whether to create test-related resources | `true` | -| `testFramework.image` | `test-framework` image repository. | `bats/bats` | -| `testFramework.tag` | `test-framework` image tag. | `v1.1.0` | -| `testFramework.imagePullPolicy` | `test-framework` image pull policy. | `IfNotPresent` | +| `testFramework.image` | `test-framework` image repository. | `bats/bats` | +| `testFramework.tag` | `test-framework` image tag. | `v1.1.0` | +| `testFramework.imagePullPolicy` | `test-framework` image pull policy. | `IfNotPresent` | | `testFramework.securityContext` | `test-framework` securityContext | `{}` | | `downloadDashboards.env` | Environment variables to be passed to the `download-dashboards` container | `{}` | | `downloadDashboards.resources` | Resources of `download-dashboards` container | `{}` | @@ -188,6 +196,8 @@ You have to add --force to your helm upgrade command as the labels of the chart | `serviceMonitor.namespace` | Namespace this servicemonitor is installed in | | | `serviceMonitor.interval` | How frequently Prometheus should scrape | `1m` | | `serviceMonitor.path` | Path to scrape | `/metrics` | +| `serviceMonitor.scheme` | Scheme to use for metrics scraping | `http` | +| `serviceMonitor.tlsConfig` | TLS configuration block for the endpoint | `{}` | | `serviceMonitor.labels` | Labels for the servicemonitor passed to Prometheus Operator | `{}` | | `serviceMonitor.scrapeTimeout` | Timeout after which the scrape is ended | `30s` | | `serviceMonitor.relabelings` | MetricRelabelConfigs to apply to samples before ingestion. | `[]` | @@ -198,6 +208,7 @@ You have to add --force to your helm upgrade command as the labels of the chart | `imageRenderer.image.sha` | image-renderer Image sha (optional) | `""` | | `imageRenderer.image.pullPolicy` | image-renderer ImagePullPolicy | `Always` | | `imageRenderer.env` | extra env-vars for image-renderer | `{}` | +| `imageRenderer.serviceAccountName` | image-renderer deployment serviceAccountName | `""` | | `imageRenderer.securityContext` | image-renderer deployment securityContext | `{}` | | `imageRenderer.hostAliases` | image-renderer deployment Host Aliases | `[]` | | `imageRenderer.priorityClassName` | image-renderer deployment priority class | `''` | @@ -311,35 +322,18 @@ If the parameter `sidecar.datasources.enabled` is set, an init container is depl pod. This container lists all secrets (or configmaps, though not recommended) in the cluster and filters out the ones with a label as defined in `sidecar.datasources.label`. The files defined in those secrets are written to a folder and accessed by grafana on startup. Using these yaml files, -the data sources in grafana can be imported. The secrets must be created before `helm install` so -that the datasources init container can list the secrets. +the data sources in grafana can be imported. Secrets are recommended over configmaps for this usecase because datasources usually contain private data like usernames and passwords. Secrets are the more appropriate cluster resource to manage those. -Example datasource config adapted from [Grafana](http://docs.grafana.org/administration/provisioning/#example-datasource-config-file): +Example values to add a datasource adapted from [Grafana](http://docs.grafana.org/administration/provisioning/#example-datasource-config-file): ```yaml -apiVersion: v1 -kind: Secret -metadata: - name: sample-grafana-datasource - labels: - grafana_datasource: "1" -type: Opaque -stringData: - datasource.yaml: |- - # config file version - apiVersion: 1 - - # list of datasources that should be deleted from the database - deleteDatasources: - - name: Graphite - orgId: 1 - - # list of datasources to insert/update depending - # whats available in the database - datasources: +datasources: + datasources.yaml: + apiVersion: 1 + datasources: # name of the datasource. Required - name: Graphite # datasource type. Required @@ -379,7 +373,6 @@ stringData: version: 1 # allow users to edit datasources from the UI. editable: false - ``` ## Sidecar for notifiers @@ -485,6 +478,24 @@ Include in the `extraSecretMounts` configuration flag: readOnly: true ``` +### extraSecretMounts using a Container Storage Interface (CSI) provider + +This example uses a CSI driver e.g. retrieving secrets using [Azure Key Vault Provider](https://github.com/Azure/secrets-store-csi-driver-provider-azure) + +```yaml +- extraSecretMounts: + - name: secrets-store-inline + mountPath: /run/secrets + readOnly: true + csi: + driver: secrets-store.csi.k8s.io + readOnly: true + volumeAttributes: + secretProviderClass: "my-provider" + nodePublishSecretRef: + name: akv-creds +``` + ## Image Renderer Plug-In This chart supports enabling [remote image rendering](https://github.com/grafana/grafana-image-renderer/blob/master/docs/remote_rendering_using_docker.md) diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/_helpers.tpl b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/_helpers.tpl index 4dd8834..9ce170c 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/_helpers.tpl +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/_helpers.tpl @@ -100,3 +100,14 @@ Selector labels ImageRenderer app.kubernetes.io/name: {{ include "grafana.name" . }}-image-renderer app.kubernetes.io/instance: {{ .Release.Name }} {{- end -}} + +{{/* +Return the appropriate apiVersion for rbac. +*/}} +{{- define "rbac.apiVersion" -}} +{{- if .Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1" }} +{{- print "rbac.authorization.k8s.io/v1" -}} +{{- else -}} +{{- print "rbac.authorization.k8s.io/v1beta1" -}} +{{- end -}} +{{- end -}} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/_pod.tpl b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/_pod.tpl index a9e471c..6b0ef5d 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/_pod.tpl +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/_pod.tpl @@ -84,6 +84,10 @@ initContainers: value: LIST - name: LABEL value: "{{ .Values.sidecar.datasources.label }}" + {{- if .Values.sidecar.datasources.labelValue }} + - name: LABEL_VALUE + value: {{ quote .Values.sidecar.datasources.labelValue }} + {{- end }} - name: FOLDER value: "/etc/grafana/provisioning/datasources" - name: RESOURCE @@ -164,6 +168,10 @@ containers: value: {{ .Values.sidecar.dashboards.watchMethod }} - name: LABEL value: "{{ .Values.sidecar.dashboards.label }}" + {{- if .Values.sidecar.dashboards.labelValue }} + - name: LABEL_VALUE + value: {{ quote .Values.sidecar.dashboards.labelValue }} + {{- end }} - name: FOLDER value: "{{ .Values.sidecar.dashboards.folder }}{{- with .Values.sidecar.dashboards.defaultFolderName }}/{{ . }}{{- end }}" - name: RESOURCE @@ -203,6 +211,10 @@ containers: - {{ . }} {{- end }} {{- end}} +{{- if .Values.containerSecurityContext }} + securityContext: +{{- toYaml .Values.containerSecurityContext | nindent 6 }} +{{- end }} volumeMounts: - name: config mountPath: "/etc/grafana/grafana.ini" @@ -419,8 +431,16 @@ volumes: # nothing {{- else }} - name: storage +{{- if .Values.persistence.inMemory.enabled }} + emptyDir: + medium: Memory +{{- if .Values.persistence.inMemory.sizeLimit }} + sizeLimit: {{ .Values.persistence.inMemory.sizeLimit }} +{{- end -}} +{{- else }} emptyDir: {} {{- end -}} +{{- end -}} {{- if .Values.sidecar.dashboards.enabled }} - name: sc-dashboard-volume emptyDir: {} @@ -447,6 +467,9 @@ volumes: {{- else if .projected }} - name: {{ .name }} projected: {{- toYaml .projected | nindent 6 }} +{{- else if .csi }} + - name: {{ .name }} + csi: {{- toYaml .csi | nindent 6 }} {{- end }} {{- end }} {{- range .Values.extraVolumeMounts }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/configmap-dashboard-provider.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/configmap-dashboard-provider.yaml index 8bb0567..65d7385 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/configmap-dashboard-provider.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/configmap-dashboard-provider.yaml @@ -16,10 +16,13 @@ data: providers: - name: '{{ .Values.sidecar.dashboards.provider.name }}' orgId: {{ .Values.sidecar.dashboards.provider.orgid }} + {{- if not .Values.sidecar.dashboards.provider.foldersFromFilesStructure }} folder: '{{ .Values.sidecar.dashboards.provider.folder }}' + {{- end}} type: {{ .Values.sidecar.dashboards.provider.type }} disableDeletion: {{ .Values.sidecar.dashboards.provider.disableDelete }} allowUiUpdates: {{ .Values.sidecar.dashboards.provider.allowUiUpdates }} + updateIntervalSeconds: {{ .Values.sidecar.dashboards.provider.updateIntervalSeconds | default 30 }} options: foldersFromFilesStructure: {{ .Values.sidecar.dashboards.provider.foldersFromFilesStructure }} path: {{ .Values.sidecar.dashboards.folder }}{{- with .Values.sidecar.dashboards.defaultFolderName }}/{{ . }}{{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/configmap.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/configmap.yaml index a9fdc3a..0d2c3e2 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/configmap.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/configmap.yaml @@ -59,6 +59,9 @@ data: --max-time 60 \ {{- if not $value.b64content }} -H "Accept: application/json" \ + {{- if $value.token }} + -H "Authorization: token {{ $value.token }}" \ + {{- end }} -H "Content-Type: application/json;charset=UTF-8" \ {{ end }} {{- if $value.url -}}"{{ $value.url }}"{{- else -}}"https://grafana.com/api/dashboards/{{ $value.gnetId }}/revisions/{{- if $value.revision -}}{{ $value.revision }}{{- else -}}1{{- end -}}/download"{{- end -}}{{ if $value.datasource }} | sed '/-- .* --/! s/"datasource":.*,/"datasource": "{{ $value.datasource }}",/g'{{ end }}{{- if $value.b64content -}} | base64 -d {{- end -}} \ diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/image-renderer-deployment.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/image-renderer-deployment.yaml index 1f60ffb..2ab9f5e 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/image-renderer-deployment.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/image-renderer-deployment.yaml @@ -40,6 +40,9 @@ spec: {{- if .Values.imageRenderer.schedulerName }} schedulerName: "{{ .Values.imageRenderer.schedulerName }}" {{- end }} + {{- if .Values.imageRenderer.serviceAccountName }} + serviceAccountName: "{{ .Values.imageRenderer.serviceAccountName }}" + {{- end }} {{- if .Values.imageRenderer.securityContext }} securityContext: {{ toYaml .Values.imageRenderer.securityContext | indent 2 }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/ingress.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/ingress.yaml index 8d35662..710b82d 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/ingress.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/ingress.yaml @@ -24,14 +24,17 @@ metadata: {{- end }} {{- end }} spec: + {{- if .Values.ingress.ingressClassName }} + ingressClassName: {{ .Values.ingress.ingressClassName }} + {{- end -}} {{- if .Values.ingress.tls }} tls: -{{ toYaml .Values.ingress.tls | indent 4 }} +{{ tpl (toYaml .Values.ingress.tls) $ | indent 4 }} {{- end }} rules: {{- if .Values.ingress.hosts }} {{- range .Values.ingress.hosts }} - - host: {{ . }} + - host: {{ tpl . $}} http: paths: {{ if $extraPaths }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/podsecuritypolicy.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/podsecuritypolicy.yaml index 9d50471..88bf64c 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/podsecuritypolicy.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/podsecuritypolicy.yaml @@ -34,6 +34,7 @@ spec: - 'configMap' - 'emptyDir' - 'projected' + - 'csi' - 'secret' - 'downwardAPI' - 'persistentVolumeClaim' diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/pvc.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/pvc.yaml index 4727d0a..8d93f5c 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/pvc.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/pvc.yaml @@ -25,4 +25,9 @@ spec: {{- if .Values.persistence.storageClassName }} storageClassName: {{ .Values.persistence.storageClassName }} {{- end -}} + {{- with .Values.persistence.selectorLabels }} + selector: + matchLabels: +{{ toYaml . | indent 6 }} + {{- end }} {{- end -}} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/role.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/role.yaml index db85355..54c3fb0 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/role.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/role.yaml @@ -1,5 +1,5 @@ {{- if and .Values.rbac.create (not .Values.rbac.useExistingRole) -}} -apiVersion: rbac.authorization.k8s.io/v1beta1 +apiVersion: {{ template "rbac.apiVersion" . }} kind: Role metadata: name: {{ template "grafana.fullname" . }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/rolebinding.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/rolebinding.yaml index 3738e58..34f1ad6 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/rolebinding.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/rolebinding.yaml @@ -1,5 +1,5 @@ {{- if .Values.rbac.create -}} -apiVersion: rbac.authorization.k8s.io/v1beta1 +apiVersion: {{ template "rbac.apiVersion" . }} kind: RoleBinding metadata: name: {{ template "grafana.fullname" . }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/secret.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/secret.yaml index 9d2f072..4fdd817 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/secret.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/secret.yaml @@ -17,6 +17,6 @@ data: {{- end }} {{- end }} {{- if not .Values.ldap.existingSecret }} - ldap-toml: {{ .Values.ldap.config | b64enc | quote }} + ldap-toml: {{ tpl .Values.ldap.config $ | b64enc | quote }} {{- end }} {{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/servicemonitor.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/servicemonitor.yaml index 988956b..2328852 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/servicemonitor.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/servicemonitor.yaml @@ -21,6 +21,11 @@ spec: honorLabels: true port: {{ .Values.service.portName }} path: {{ .Values.serviceMonitor.path }} + scheme: {{ .Values.serviceMonitor.scheme }} + {{- if .Values.serviceMonitor.tlsConfig }} + tlsConfig: + {{- toYaml .Values.serviceMonitor.tlsConfig | nindent 6 }} + {{- end }} {{- if .Values.serviceMonitor.relabelings }} relabelings: {{- toYaml .Values.serviceMonitor.relabelings | nindent 4 }} @@ -28,8 +33,7 @@ spec: jobLabel: "{{ .Release.Name }}" selector: matchLabels: - app: {{ template "grafana.name" . }} - release: "{{ .Release.Name }}" + {{- include "grafana.selectorLabels" . | nindent 8 }} namespaceSelector: matchNames: - {{ .Release.Namespace }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/statefulset.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/statefulset.yaml index accfa56..b2b4616 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/statefulset.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/statefulset.yaml @@ -43,5 +43,10 @@ spec: storageClassName: {{ .Values.persistence.storageClassName }} resources: requests: - storage: {{ .Values.persistence.size }} + storage: {{ .Values.persistence.size }} + {{- with .Values.persistence.selectorLabels }} + selector: + matchLabels: +{{ toYaml . | indent 10 }} + {{- end }} {{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/tests/test-podsecuritypolicy.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/tests/test-podsecuritypolicy.yaml index eb5cbbc..1acd651 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/tests/test-podsecuritypolicy.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/templates/tests/test-podsecuritypolicy.yaml @@ -25,5 +25,6 @@ spec: - downwardAPI - emptyDir - projected + - csi - secret {{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/values.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/values.yaml index 1f22b93..c461687 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/values.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/grafana/values.yaml @@ -53,7 +53,7 @@ livenessProbe: image: repository: grafana/grafana - tag: 7.2.1 + tag: 7.4.2 sha: "" pullPolicy: IfNotPresent @@ -76,6 +76,8 @@ securityContext: runAsGroup: 472 fsGroup: 472 +containerSecurityContext: + {} extraConfigmapMounts: [] # - name: certs-configmap @@ -136,6 +138,8 @@ serviceMonitor: # namespace: monitoring (defaults to use the namespace this chart is deployed to) labels: {} interval: 1m + scheme: http + tlsConfig: {} scrapeTimeout: 30s relabelings: [] @@ -153,6 +157,9 @@ hostAliases: [] ingress: enabled: false + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx # Values can be templated annotations: {} # kubernetes.io/ingress.class: nginx @@ -235,9 +242,21 @@ persistence: # annotations: {} finalizers: - kubernetes.io/pvc-protection + # selectorLabels: {} # subPath: "" # existingClaim: + ## If persistence is not enabled, this allows to mount the + ## local storage in-memory to improve performance + ## + inMemory: + enabled: false + ## The maximum usage on memory medium EmptyDir would be + ## the minimum value between the SizeLimit specified + ## here and the sum of memory limits of all containers in a pod + ## + # sizeLimit: 300Mi + initChownData: ## If false, data ownership will not be reset at startup ## This allows the prometheus-server to be run with an arbitrary user @@ -348,6 +367,18 @@ extraSecretMounts: [] # audience: sts.amazonaws.com # expirationSeconds: 86400 # path: token + # + # for CSI e.g. Azure Key Vault use the following + # - name: secrets-store-inline + # mountPath: /run/secrets + # readOnly: true + # csi: + # driver: secrets-store.csi.k8s.io + # readOnly: true + # volumeAttributes: + # secretProviderClass: "akv-grafana-spc" + # nodePublishSecretRef: # Only required when using service principal mode + # name: grafana-akv-creds # Only required when using service principal mode ## Additional grafana server volume mounts # Defines additional volume mounts. @@ -439,8 +470,10 @@ dashboards: {} # datasource: Prometheus # local-dashboard: # url: https://example.com/repository/test.json + # token: '' # local-dashboard-base64: # url: https://example.com/repository/test-b64.json + # token: '' # b64content: true ## Reference to external ConfigMap per provider. Use provider name as key and ConfigMap name as value. @@ -530,8 +563,8 @@ smtp: ## Requires at least Grafana 5 to work and can't be used together with parameters dashboardProviders, datasources and dashboards sidecar: image: - repository: kiwigrid/k8s-sidecar - tag: 1.1.0 + repository: quay.io/kiwigrid/k8s-sidecar + tag: 1.10.6 sha: "" imagePullPolicy: IfNotPresent resources: {} @@ -549,6 +582,8 @@ sidecar: SCProvider: true # label that the configmaps with dashboards are marked with label: grafana_dashboard + # value of label that the configmaps with dashboards are set to + labelValue: null # folder in the pod that should hold the collected dashboards (unless `defaultFolderName` is set) folder: /tmp/dashboards # The default folder name, it will create a subfolder under the `folder` and put dashboards in there instead @@ -580,6 +615,8 @@ sidecar: enabled: false # label that the configmaps with datasources are marked with label: grafana_datasource + # value of label that the configmaps with datasources are set to + labelValue: null # If specified, the sidecar will search for datasource config-maps inside this namespace. # Otherwise the namespace in which the sidecar is running will be used. # It's also possible to specify ALL to search in all namespaces @@ -616,9 +653,12 @@ imageRenderer: # image-renderer ImagePullPolicy pullPolicy: Always # extra environment variables - env: {} + env: + HTTP_HOST: "0.0.0.0" # RENDERING_ARGS: --disable-gpu,--window-size=1280x758 # RENDERING_MODE: clustered + # image-renderer deployment serviceAccount + serviceAccountName: "" # image-renderer deployment securityContext securityContext: {} # image-renderer deployment Host Aliases @@ -630,6 +670,7 @@ imageRenderer: portName: 'http' # image-renderer service port used by both service and deployment port: 8081 + targetPort: 8081 # name of the image-renderer port on the pod podPortName: http # number of image-renderer replica sets to keep diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/Chart.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/Chart.yaml index 663499a..45a1a71 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/Chart.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/Chart.yaml @@ -1,15 +1,18 @@ apiVersion: v1 -appVersion: 1.9.7 -deprecated: true -description: DEPRECATED - Install kube-state-metrics to generate and expose cluster-level - metrics +appVersion: 1.9.8 +description: Install kube-state-metrics to generate and expose cluster-level metrics home: https://github.com/kubernetes/kube-state-metrics/ keywords: - metric - monitoring - prometheus - kubernetes +maintainers: +- email: tariq.ibrahim@mulesoft.com + name: tariq1890 +- email: manuel@rueg.eu + name: mrueg name: kube-state-metrics sources: - https://github.com/kubernetes/kube-state-metrics/ -version: 2.9.4 +version: 2.13.0 diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/LICENSE b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/LICENSE new file mode 100644 index 0000000..393b7a3 --- /dev/null +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright The Helm Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/OWNERS b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/OWNERS new file mode 100644 index 0000000..206b4fe --- /dev/null +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/OWNERS @@ -0,0 +1,6 @@ +approvers: +- tariq1890 +- mrueg +reviewers: +- tariq1890 +- mrueg diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/README.md b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/README.md index 4e1178e..e93a3d2 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/README.md +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/README.md @@ -1,91 +1,66 @@ -# ⚠️ Repo Archive Notice - -As of Nov 13, 2020, charts in this repo will no longer be updated. -For more information, see the Helm Charts [Deprecation and Archive Notice](https://github.com/helm/charts#%EF%B8%8F-deprecation-and-archive-notice), and [Update](https://helm.sh/blog/charts-repo-deprecation/). - # kube-state-metrics Helm Chart -* Installs the [kube-state-metrics agent](https://github.com/kubernetes/kube-state-metrics). +Installs the [kube-state-metrics agent](https://github.com/kubernetes/kube-state-metrics). -## DEPRECATION NOTICE +## Get Repo Info -This chart is deprecated and no longer supported. - -## Installing the Chart - -To install the chart with the release name `my-release`: - -```bash -$ helm install stable/kube-state-metrics +```console +helm repo add kube-state-metrics https://kubernetes.github.io/kube-state-metrics +helm repo update ``` +_See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation._ + +## Install Chart + +```console +# Helm 3 +$ helm install [RELEASE_NAME] kube-state-metrics/kube-state-metrics [flags] + +# Helm 2 +$ helm install --name [RELEASE_NAME] kube-state-metrics/kube-state-metrics [flags] +``` + +_See [configuration](#configuration) below._ + +_See [helm install](https://helm.sh/docs/helm/helm_install/) for command documentation._ + +## Uninstall Chart + +```console +# Helm 3 +$ helm uninstall [RELEASE_NAME] + +# Helm 2 +# helm delete --purge [RELEASE_NAME] +``` + +This removes all the Kubernetes components associated with the chart and deletes the release. + +_See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall/) for command documentation._ + +## Upgrading Chart + +```console +# Helm 3 or 2 +$ helm upgrade [RELEASE_NAME] kube-state-metrics/kube-state-metrics [flags] +``` + +_See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documentation._ + +### From stable/kube-state-metrics + +You can upgrade in-place: + +1. [get repo info](#get-repo-info) +1. [upgrade](#upgrading-chart) your existing release name using the new chart repo + ## Configuration -| Parameter | Description | Default | -|:---------------------------------------------|:--------------------------------------------------------------------------------------|:-------------------------------------------| -| `image.repository` | The image repository to pull from | `quay.io/coreos/kube-state-metrics` | -| `image.tag` | The image tag to pull from | `v1.9.7` | -| `image.pullPolicy` | Image pull policy | `IfNotPresent` | -| `imagePullSecrets` | List of container registry secrets | `[]` | -| `replicas` | Number of replicas | `1` | -| `autosharding.enabled` | Set to `true` to automatically shard data across `replicas` pods. EXPERIMENTAL | `false` | -| `service.port` | The port of the container | `8080` | -| `service.annotations` | Annotations to be added to the service | `{}` | -| `customLabels` | Custom labels to apply to service, deployment and pods | `{}` | -| `hostNetwork` | Whether or not to use the host network | `false` | -| `prometheusScrape` | Whether or not enable prom scrape | `true` | -| `rbac.create` | If true, create & use RBAC resources | `true` | -| `serviceAccount.create` | If true, create & use serviceAccount | `true` | -| `serviceAccount.name` | If not set & create is true, use template fullname | | -| `serviceAccount.imagePullSecrets` | Specify image pull secrets field | `[]` | -| `serviceAccount.annotations` | Annotations to be added to the serviceAccount | `{}` | -| `podSecurityPolicy.enabled` | If true, create & use PodSecurityPolicy resources. Note that related RBACs are created only if `rbac.enabled` is `true`. | `false` | -| `podSecurityPolicy.annotations` | Specify pod annotations in the pod security policy | `{}` | -| `podSecurityPolicy.additionalVolumes` | Specify allowed volumes in the pod security policy (`secret` is always allowed) | `[]` | -| `securityContext.enabled` | Enable security context | `true` | -| `securityContext.fsGroup` | Group ID for the filesystem | `65534` | -| `securityContext.runAsGroup` | Group ID for the container | `65534` | -| `securityContext.runAsUser` | User ID for the container | `65534` | -| `priorityClassName` | Name of Priority Class to assign pods | `nil` | -| `nodeSelector` | Node labels for pod assignment | `{}` | -| `affinity` | Affinity settings for pod assignment | `{}` | -| `tolerations` | Tolerations for pod assignment | `[]` | -| `podAnnotations` | Annotations to be added to the pod | `{}` | -| `podDisruptionBudget` | Optional PodDisruptionBudget | `{}` | -| `resources` | kube-state-metrics resource requests and limits | `{}` | -| `collectors.certificatesigningrequests` | Enable the certificatesigningrequests collector. | `true` | -| `collectors.configmaps` | Enable the configmaps collector. | `true` | -| `collectors.cronjobs` | Enable the cronjobs collector. | `true` | -| `collectors.daemonsets` | Enable the daemonsets collector. | `true` | -| `collectors.deployments` | Enable the deployments collector. | `true` | -| `collectors.endpoints` | Enable the endpoints collector. | `true` | -| `collectors.horizontalpodautoscalers` | Enable the horizontalpodautoscalers collector. | `true` | -| `collectors.ingresses` | Enable the ingresses collector. | `true` | -| `collectors.jobs` | Enable the jobs collector. | `true` | -| `collectors.limitranges` | Enable the limitranges collector. | `true` | -| `collectors.mutatingwebhookconfigurations` | Enable the mutatingwebhookconfigurations collector. | `true` | -| `collectors.namespaces` | Enable the namespaces collector. | `true` | -| `collectors.networkpolicies` | Enable the networkpolicies collector. | `true` | -| `collectors.nodes` | Enable the nodes collector. | `true` | -| `collectors.persistentvolumeclaims` | Enable the persistentvolumeclaims collector. | `true` | -| `collectors.persistentvolumes` | Enable the persistentvolumes collector. | `true` | -| `collectors.poddisruptionbudgets` | Enable the poddisruptionbudgets collector. | `true` | -| `collectors.pods` | Enable the pods collector. | `true` | -| `collectors.replicasets` | Enable the replicasets collector. | `true` | -| `collectors.replicationcontrollers` | Enable the replicationcontrollers collector. | `true` | -| `collectors.resourcequotas` | Enable the resourcequotas collector. | `true` | -| `collectors.secrets` | Enable the secrets collector. | `true` | -| `collectors.services` | Enable the services collector. | `true` | -| `collectors.statefulsets` | Enable the statefulsets collector. | `true` | -| `collectors.storageclasses` | Enable the storageclasses collector. | `true` | -| `collectors.validatingwebhookconfigurations` | Enable the validatingwebhookconfigurations collector. | `true` | -| `collectors.verticalpodautoscalers` | Enable the verticalpodautoscalers collector. | `true` | -| `collectors.volumeattachments` | Enable the volumeattachments collector. | `true` | -| `prometheus.monitor.enabled` | Set this to `true` to create ServiceMonitor for Prometheus operator | `false` | -| `prometheus.monitor.additionalLabels` | Additional labels that can be used so ServiceMonitor will be discovered by Prometheus | `{}` | -| `prometheus.monitor.namespace` | Namespace where servicemonitor resource should be created | `the same namespace as kube-state-metrics` | -| `prometheus.monitor.honorLabels` | Honor metric labels | `false` | -| `namespaceOverride` | Override the deployment namespace | `""` (`Release.Namespace`) | -| `kubeTargetVersionOverride` | Override the k8s version of the target cluster | `""` | -| `kubeconfig.enabled` | Adds --kubeconfig arg to container at startup | `""` | -| `kubeconfig.secret` | Base64 encoded kubeconfig file | `""` | +See [Customizing the Chart Before Installing](https://helm.sh/docs/intro/using_helm/#customizing-the-chart-before-installing). To see all configurable options with detailed comments: + +```console +helm show values kube-state-metrics/kube-state-metrics +``` + +You may also `helm show values` on this chart's [dependencies](#dependencies) for additional options. diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/clusterrolebinding.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/clusterrolebinding.yaml index 160db8b..af158c5 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/clusterrolebinding.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/clusterrolebinding.yaml @@ -1,4 +1,4 @@ -{{- if .Values.rbac.create -}} +{{- if and .Values.rbac.create .Values.rbac.useClusterRole -}} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: @@ -11,7 +11,11 @@ metadata: roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole +{{- if .Values.rbac.useExistingRole }} + name: {{ .Values.rbac.useExistingRole }} +{{- else }} name: {{ template "kube-state-metrics.fullname" . }} +{{- end }} subjects: - kind: ServiceAccount name: {{ template "kube-state-metrics.fullname" . }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/deployment.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/deployment.yaml index 8f491ec..5f6b644 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/deployment.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/deployment.yaml @@ -12,6 +12,7 @@ metadata: helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" app.kubernetes.io/instance: "{{ .Release.Name }}" app.kubernetes.io/managed-by: "{{ .Release.Service }}" + app.kubernetes.io/version: "{{ .Chart.AppVersion }}" {{- if .Values.customLabels }} {{ toYaml .Values.customLabels | indent 4 }} {{- end }} @@ -62,6 +63,11 @@ spec: fieldPath: metadata.namespace {{- end }} args: +{{ if .Values.extraArgs }} + {{- range .Values.extraArgs }} + - {{ . }} + {{- end }} +{{ end }} {{ if .Values.collectors.certificatesigningrequests }} - --collectors=certificatesigningrequests {{ end }} @@ -147,7 +153,7 @@ spec: - --collectors=volumeattachments {{ end }} {{ if .Values.namespace }} - - --namespace={{ .Values.namespace }} + - --namespace={{ .Values.namespace | join "," }} {{ end }} {{ if .Values.autosharding.enabled }} - --pod=$(POD_NAME) @@ -156,6 +162,10 @@ spec: {{ if .Values.kubeconfig.enabled }} - --kubeconfig=/opt/k8s/.kube/config {{ end }} +{{ if .Values.selfMonitor.telemetryHost }} + - --telemetry-host={{ .Values.selfMonitor.telemetryHost }} +{{ end }} + - --telemetry-port=8081 {{- if .Values.kubeconfig.enabled }} volumeMounts: - name: kubeconfig diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/pdb.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/pdb.yaml index 6adb50d..d3ef810 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/pdb.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/pdb.yaml @@ -9,9 +9,12 @@ metadata: helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" app.kubernetes.io/instance: "{{ .Release.Name }}" app.kubernetes.io/managed-by: "{{ .Release.Service }}" +{{- if .Values.customLabels }} +{{ toYaml .Values.customLabels | indent 4 }} +{{- end }} spec: selector: matchLabels: app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }} {{ toYaml .Values.podDisruptionBudget | indent 2 }} -{{- end -}} \ No newline at end of file +{{- end -}} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/clusterrole.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/role.yaml similarity index 61% rename from charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/clusterrole.yaml rename to charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/role.yaml index a9198b8..6259d2f 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/clusterrole.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/role.yaml @@ -1,177 +1,189 @@ -{{- if .Values.rbac.create -}} +{{- if and (eq $.Values.rbac.create true) (not .Values.rbac.useExistingRole) -}} +{{- if eq .Values.rbac.useClusterRole false }} +{{- range (split "," $.Values.namespace) }} +{{- end }} +{{- end -}} +--- apiVersion: rbac.authorization.k8s.io/v1 +{{- if eq .Values.rbac.useClusterRole false }} +kind: Role +{{- else }} kind: ClusterRole +{{- end }} metadata: labels: - app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }} - helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - app.kubernetes.io/instance: {{ .Release.Name }} - name: {{ template "kube-state-metrics.fullname" . }} + app.kubernetes.io/name: {{ template "kube-state-metrics.name" $ }} + helm.sh/chart: {{ $.Chart.Name }}-{{ $.Chart.Version }} + app.kubernetes.io/managed-by: {{ $.Release.Service }} + app.kubernetes.io/instance: {{ $.Release.Name }} + name: {{ template "kube-state-metrics.fullname" $ }} +{{- if eq .Values.rbac.useClusterRole false }} + namespace: {{ . }} +{{- end }} rules: -{{ if .Values.collectors.certificatesigningrequests }} +{{ if $.Values.collectors.certificatesigningrequests }} - apiGroups: ["certificates.k8s.io"] resources: - certificatesigningrequests verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.configmaps }} +{{ if $.Values.collectors.configmaps }} - apiGroups: [""] resources: - configmaps verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.cronjobs }} +{{ if $.Values.collectors.cronjobs }} - apiGroups: ["batch"] resources: - cronjobs verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.daemonsets }} +{{ if $.Values.collectors.daemonsets }} - apiGroups: ["extensions", "apps"] resources: - daemonsets verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.deployments }} +{{ if $.Values.collectors.deployments }} - apiGroups: ["extensions", "apps"] resources: - deployments verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.endpoints }} +{{ if $.Values.collectors.endpoints }} - apiGroups: [""] resources: - endpoints verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.horizontalpodautoscalers }} +{{ if $.Values.collectors.horizontalpodautoscalers }} - apiGroups: ["autoscaling"] resources: - horizontalpodautoscalers verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.ingresses }} +{{ if $.Values.collectors.ingresses }} - apiGroups: ["extensions", "networking.k8s.io"] resources: - ingresses verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.jobs }} +{{ if $.Values.collectors.jobs }} - apiGroups: ["batch"] resources: - jobs verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.limitranges }} +{{ if $.Values.collectors.limitranges }} - apiGroups: [""] resources: - limitranges verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.mutatingwebhookconfigurations }} +{{ if $.Values.collectors.mutatingwebhookconfigurations }} - apiGroups: ["admissionregistration.k8s.io"] resources: - mutatingwebhookconfigurations verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.namespaces }} +{{ if $.Values.collectors.namespaces }} - apiGroups: [""] resources: - namespaces verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.networkpolicies }} +{{ if $.Values.collectors.networkpolicies }} - apiGroups: ["networking.k8s.io"] resources: - networkpolicies verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.nodes }} +{{ if $.Values.collectors.nodes }} - apiGroups: [""] resources: - nodes verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.persistentvolumeclaims }} +{{ if $.Values.collectors.persistentvolumeclaims }} - apiGroups: [""] resources: - persistentvolumeclaims verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.persistentvolumes }} +{{ if $.Values.collectors.persistentvolumes }} - apiGroups: [""] resources: - persistentvolumes verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.poddisruptionbudgets }} +{{ if $.Values.collectors.poddisruptionbudgets }} - apiGroups: ["policy"] resources: - poddisruptionbudgets verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.pods }} +{{ if $.Values.collectors.pods }} - apiGroups: [""] resources: - pods verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.replicasets }} +{{ if $.Values.collectors.replicasets }} - apiGroups: ["extensions", "apps"] resources: - replicasets verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.replicationcontrollers }} +{{ if $.Values.collectors.replicationcontrollers }} - apiGroups: [""] resources: - replicationcontrollers verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.resourcequotas }} +{{ if $.Values.collectors.resourcequotas }} - apiGroups: [""] resources: - resourcequotas verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.secrets }} +{{ if $.Values.collectors.secrets }} - apiGroups: [""] resources: - secrets verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.services }} +{{ if $.Values.collectors.services }} - apiGroups: [""] resources: - services verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.statefulsets }} +{{ if $.Values.collectors.statefulsets }} - apiGroups: ["apps"] resources: - statefulsets verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.storageclasses }} +{{ if $.Values.collectors.storageclasses }} - apiGroups: ["storage.k8s.io"] resources: - storageclasses verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.validatingwebhookconfigurations }} +{{ if $.Values.collectors.validatingwebhookconfigurations }} - apiGroups: ["admissionregistration.k8s.io"] resources: - validatingwebhookconfigurations verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.volumeattachments }} +{{ if $.Values.collectors.volumeattachments }} - apiGroups: ["storage.k8s.io"] resources: - volumeattachments verbs: ["list", "watch"] {{ end -}} -{{ if .Values.collectors.verticalpodautoscalers }} +{{ if $.Values.collectors.verticalpodautoscalers }} - apiGroups: ["autoscaling.k8s.io"] resources: - verticalpodautoscalers diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/rolebinding.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/rolebinding.yaml new file mode 100644 index 0000000..89bb41b --- /dev/null +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/rolebinding.yaml @@ -0,0 +1,27 @@ +{{- if and (eq .Values.rbac.create true) (eq .Values.rbac.useClusterRole false) -}} +{{- range (split "," $.Values.namespace) }} +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/name: {{ template "kube-state-metrics.name" $ }} + helm.sh/chart: {{ $.Chart.Name }}-{{ $.Chart.Version }} + app.kubernetes.io/managed-by: {{ $.Release.Service }} + app.kubernetes.io/instance: {{ $.Release.Name }} + name: {{ template "kube-state-metrics.fullname" $ }} + namespace: {{ . }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role +{{- if (not $.Values.rbac.useExistingRole) }} + name: {{ template "kube-state-metrics.fullname" $ }} +{{- else }} + name: {{ $.Values.rbac.useExistingRole }} +{{- end }} +subjects: +- kind: ServiceAccount + name: {{ template "kube-state-metrics.fullname" $ }} + namespace: {{ template "kube-state-metrics.namespace" $ }} +{{- end -}} +{{- end -}} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/service.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/service.yaml index 5dacf52..4f8e4a4 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/service.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/service.yaml @@ -28,6 +28,12 @@ spec: nodePort: {{ .Values.service.nodePort }} {{- end }} targetPort: 8080 + {{ if .Values.selfMonitor.enabled }} + - name: "metrics" + protocol: TCP + port: {{ .Values.selfMonitor.telemetryPort | default 8081 }} + targetPort: 8081 + {{ end }} {{- if .Values.service.loadBalancerIP }} loadBalancerIP: "{{ .Values.service.loadBalancerIP }}" {{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/servicemonitor.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/servicemonitor.yaml index 54cde36..7d1cd7a 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/servicemonitor.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/templates/servicemonitor.yaml @@ -12,6 +12,9 @@ metadata: {{- if .Values.prometheus.monitor.additionalLabels }} {{ toYaml .Values.prometheus.monitor.additionalLabels | indent 4 }} {{- end }} +{{- if .Values.customLabels }} +{{ toYaml .Values.customLabels | indent 4 }} +{{- end }} spec: selector: matchLabels: @@ -22,4 +25,10 @@ spec: {{- if .Values.prometheus.monitor.honorLabels }} honorLabels: true {{- end }} + {{ if .Values.selfMonitor.enabled }} + - port: metrics + {{- if .Values.prometheus.monitor.honorLabels }} + honorLabels: true + {{- end }} + {{ end }} {{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/values.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/values.yaml index 3edd4c8..9522cfe 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/values.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/kube-state-metrics/values.yaml @@ -1,8 +1,8 @@ # Default values for kube-state-metrics. prometheusScrape: true image: - repository: quay.io/coreos/kube-state-metrics - tag: v1.9.7 + repository: k8s.gcr.io/kube-state-metrics/kube-state-metrics + tag: v1.9.8 pullPolicy: IfNotPresent imagePullSecrets: [] @@ -17,6 +17,11 @@ autosharding: replicas: 1 +# List of additional cli arguments to configure kube-state-metrics +# for example: --enable-gzip-encoding, --log-file, etc. +# all the possible args can be found here: https://github.com/kubernetes/kube-state-metrics/blob/master/docs/cli-arguments.md +extraArgs: [] + service: port: 8080 # Default to clusterIP for backward compatibility @@ -33,6 +38,12 @@ rbac: # If true, create & use RBAC resources create: true + # Set to a rolename to use existing role - skipping role creating - but still doing serviceaccount and rolebinding to it, rolename set here. + # useExistingRole: your-existing-role + + # If set to false - Run without Cluteradmin privs needed - ONLY works if namespace is also set (if useExistingRole is set this name is used as ClusterRole or Role to bind to) + useClusterRole: true + serviceAccount: # Specifies whether a ServiceAccount should be created, require rbac true create: true @@ -159,3 +170,10 @@ resources: {} ## For example: kubeTargetVersionOverride: 1.14.9 ## kubeTargetVersionOverride: "" + +# Enable self metrics configuration for service and Service Monitor +# Default values for telemetry configuration can be overriden +selfMonitor: + enabled: false + # telemetryHost: 0.0.0.0 + # telemetryPort: 8081 diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/Chart.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/Chart.yaml index 6594547..f35460b 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/Chart.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/Chart.yaml @@ -10,7 +10,8 @@ maintainers: - email: gianrubio@gmail.com name: gianrubio - name: vsliouniaev +- name: bismarck name: prometheus-node-exporter sources: - https://github.com/prometheus/node_exporter/ -version: 1.12.0 +version: 1.14.2 diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/daemonset.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/daemonset.yaml index 2787dae..cd6f65f 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/daemonset.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/daemonset.yaml @@ -43,6 +43,9 @@ spec: {{- if .Values.extraArgs }} {{ toYaml .Values.extraArgs | indent 12 }} {{- end }} + {{- with .Values.containerSecurityContext }} + securityContext: {{ toYaml . | nindent 12 }} + {{- end }} env: - name: HOST_IP {{- if .Values.service.listenOnAllInterfaces }} @@ -55,7 +58,7 @@ spec: {{- end }} ports: - name: metrics - containerPort: {{ .Values.service.targetPort }} + containerPort: {{ .Values.service.port }} protocol: TCP livenessProbe: httpGet: @@ -100,6 +103,12 @@ spec: - name: {{ $mount.name }} mountPath: {{ $mount.mountPath }} {{- end }} + {{- if .Values.secrets }} + {{- range $_, $mount := .Values.secrets }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + {{- end }} + {{- end }} {{- end }} {{- if .Values.sidecars }} {{ toYaml .Values.sidecars | indent 8 }} @@ -157,3 +166,10 @@ spec: name: {{ $mount.name }} {{- end }} {{- end }} + {{- if .Values.secrets }} + {{- range $_, $mount := .Values.secrets }} + - name: {{ $mount.name }} + secret: + secretName: {{ $mount.name }} + {{- end }} + {{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/monitor.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/monitor.yaml index 4e31ba3..2f7b6ae 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/monitor.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/monitor.yaml @@ -15,6 +15,13 @@ spec: release: {{ .Release.Name }} endpoints: - port: metrics + scheme: {{ $.Values.prometheus.monitor.scheme }} + {{- if $.Values.prometheus.monitor.bearerTokenFile }} + bearerTokenFile: {{ $.Values.prometheus.monitor.bearerTokenFile }} + {{- end }} + {{- if $.Values.prometheus.monitor.tlsConfig }} + tlsConfig: {{ toYaml $.Values.prometheus.monitor.tlsConfig | nindent 8 }} + {{- end }} {{- if .Values.prometheus.monitor.scrapeTimeout }} scrapeTimeout: {{ .Values.prometheus.monitor.scrapeTimeout }} {{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/serviceaccount.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/serviceaccount.yaml index bd1c223..07e9f0d 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/serviceaccount.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/serviceaccount.yaml @@ -10,6 +10,8 @@ metadata: chart: {{ template "prometheus-node-exporter.chart" . }} release: "{{ .Release.Name }}" heritage: "{{ .Release.Service }}" + annotations: +{{ toYaml .Values.serviceAccount.annotations | indent 4 }} imagePullSecrets: {{ toYaml .Values.serviceAccount.imagePullSecrets | indent 2 }} {{- end -}} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/values.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/values.yaml index 7edd893..4be3f9c 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/values.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/charts/prometheus-node-exporter/values.yaml @@ -20,6 +20,9 @@ prometheus: enabled: false additionalLabels: {} namespace: "" + scheme: http + bearerTokenFile: + tlsConfig: {} relabelings: [] scrapeTimeout: 10s @@ -48,6 +51,7 @@ serviceAccount: # The name of the ServiceAccount to use. # If not set and create is true, a name is generated using the fullname template name: + annotations: {} imagePullSecrets: [] securityContext: @@ -56,6 +60,11 @@ securityContext: runAsNonRoot: true runAsUser: 65534 +containerSecurityContext: {} + # capabilities: + # add: + # - SYS_TIME + rbac: ## If true, create & use RBAC resources ## @@ -84,7 +93,9 @@ affinity: {} # - target-host-name # Annotations to be added to node exporter pods -podAnnotations: {} +podAnnotations: + # Fix for very slow GKE cluster upgrades + cluster-autoscaler.kubernetes.io/safe-to-evict: "true" # Extra labels to be added to node exporter pods podLabels: {} @@ -122,7 +133,9 @@ extraHostVolumeMounts: [] configmaps: [] # - name: # mountPath: - +secrets: [] +# - name: +# mountPath: ## Override the deployment namespace ## namespaceOverride: "" diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-alertmanagerconfigs.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-alertmanagerconfigs.yaml index fb1ad5f..a279253 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-alertmanagerconfigs.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-alertmanagerconfigs.yaml @@ -1,4 +1,4 @@ -# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.44.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.45.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml --- apiVersion: apiextensions.k8s.io/v1 @@ -50,6 +50,7 @@ spec: properties: name: description: Label to match. + minLength: 1 type: string regex: description: Whether to match on equality (false) or regular-expression (true). @@ -59,7 +60,6 @@ spec: type: string required: - name - - value type: object type: array targetMatch: @@ -69,6 +69,7 @@ spec: properties: name: description: Label to match. + minLength: 1 type: string regex: description: Whether to match on equality (false) or regular-expression (true). @@ -78,7 +79,6 @@ spec: type: string required: - name - - value type: object type: array type: object @@ -94,9 +94,10 @@ spec: description: EmailConfig configures notifications via Email. properties: authIdentity: + description: The identity to use for authentication. type: string authPassword: - description: SecretKeySelector selects a key of a Secret. + description: The secret's key that contains the password to use for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. properties: key: description: The key of the secret to select from. Must be a valid secret key. @@ -111,7 +112,7 @@ spec: - key type: object authSecret: - description: SecretKeySelector selects a key of a Secret. + description: The secret's key that contains the CRAM-MD5 secret. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. properties: key: description: The key of the secret to select from. Must be a valid secret key. @@ -126,7 +127,7 @@ spec: - key type: object authUsername: - description: SMTP authentication information. + description: The username to use for authentication. type: string from: description: The sender address. @@ -138,6 +139,7 @@ spec: properties: key: description: Key of the tuple. + minLength: 1 type: string value: description: Value of the tuple. @@ -265,6 +267,7 @@ spec: type: array name: description: Name of the receiver. Must be unique across all items from the list. + minLength: 1 type: string opsgenieConfigs: description: List of OpsGenie configurations. @@ -299,6 +302,7 @@ spec: properties: key: description: Key of the tuple. + minLength: 1 type: string value: description: Value of the tuple. @@ -469,7 +473,7 @@ spec: responders: description: List of responders responsible for notifications. items: - description: OpsGenieConfigResponder defines a responder to an incident. One of id, name or username has to be defined. + description: OpsGenieConfigResponder defines a responder to an incident. One of `id`, `name` or `username` has to be defined. properties: id: description: ID of the responder. @@ -479,10 +483,13 @@ spec: type: string type: description: Type of responder. + minLength: 1 type: string username: description: Username of the responder. type: string + required: + - type type: object type: array sendResolved: @@ -523,6 +530,7 @@ spec: properties: key: description: Key of the tuple. + minLength: 1 type: string value: description: Value of the tuple. @@ -904,7 +912,7 @@ spec: description: Notification title. type: string token: - description: Your registered application’s API token, see https://pushover.net/apps + description: The secret's key that contains the registered application’s API token, see https://pushover.net/apps. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. properties: key: description: The key of the secret to select from. Must be a valid secret key. @@ -925,7 +933,7 @@ spec: description: A title for supplementary URL, otherwise just the URL is shown type: string userKey: - description: The recipient user’s user key. + description: The secret's key that contains the recipient user’s user key. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. properties: key: description: The key of the secret to select from. Must be a valid secret key. @@ -959,6 +967,7 @@ spec: okText: type: string text: + minLength: 1 type: string title: type: string @@ -970,8 +979,10 @@ spec: style: type: string text: + minLength: 1 type: string type: + minLength: 1 type: string url: type: string @@ -1014,8 +1025,10 @@ spec: short: type: boolean title: + minLength: 1 type: string value: + minLength: 1 type: string required: - title @@ -1210,7 +1223,7 @@ spec: description: VictorOpsConfig configures notifications via VictorOps. See https://prometheus.io/docs/alerting/latest/configuration/#victorops_config properties: apiKey: - description: The API key to use when talking to the VictorOps API. + description: The secret's key that contains the API key to use when talking to the VictorOps API. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. properties: key: description: The key of the secret to select from. Must be a valid secret key. @@ -1234,6 +1247,7 @@ spec: properties: key: description: Key of the tuple. + minLength: 1 type: string value: description: Value of the tuple. @@ -1410,8 +1424,6 @@ spec: stateMessage: description: Contains long explanation of the alerted problem. type: string - required: - - routingKey type: object type: array webhookConfigs: @@ -1569,8 +1581,9 @@ spec: type: object type: object maxAlerts: - description: Maximum number of alerts to be sent per webhook message. + description: Maximum number of alerts to be sent per webhook message. When 0, all alerts are included. format: int32 + minimum: 0 type: integer sendResolved: description: Whether or not to notify about resolved alerts. @@ -1793,7 +1806,7 @@ spec: type: object type: array route: - description: The Alertmanager route definition for alerts matching the resource’s namespace. It will be added to the generated Alertmanager configuration as a first-level route. + description: The Alertmanager route definition for alerts matching the resource’s namespace. If present, it will be added to the generated Alertmanager configuration as a first-level route. properties: continue: description: Boolean indicating whether an alert should continue matching subsequent sibling nodes. It will always be overridden to true for the first-level route by the Prometheus operator. @@ -1816,6 +1829,7 @@ spec: properties: name: description: Label to match. + minLength: 1 type: string regex: description: Whether to match on equality (false) or regular-expression (true). @@ -1825,11 +1839,10 @@ spec: type: string required: - name - - value type: object type: array receiver: - description: Name of the receiver for this route. If present, it should be listed in the `receivers` field. The field can be omitted only for nested routes otherwise it is mandatory. + description: Name of the receiver for this route. If not empty, it should be listed in the `receivers` field. type: string repeatInterval: description: How long to wait before repeating the last notification. Must match the regular expression `[0-9]+(ms|s|m|h)` (milliseconds seconds minutes hours). diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-alertmanagers.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-alertmanagers.yaml index 86a6b98..7a4ec17 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-alertmanagers.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-alertmanagers.yaml @@ -1,4 +1,4 @@ -# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.44.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.45.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml --- apiVersion: apiextensions.k8s.io/v1 diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-podmonitors.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-podmonitors.yaml index 630465b..95fbafb 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-podmonitors.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-podmonitors.yaml @@ -1,4 +1,4 @@ -# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.44.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.45.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml --- apiVersion: apiextensions.k8s.io/v1 diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-probes.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-probes.yaml index 41a1b6f..5ef8405 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-probes.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-probes.yaml @@ -1,4 +1,4 @@ -# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.44.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.45.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml --- apiVersion: apiextensions.k8s.io/v1 diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-prometheuses.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-prometheuses.yaml index fd43ebc..6a82bc5 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-prometheuses.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-prometheuses.yaml @@ -1,4 +1,4 @@ -# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.44.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.45.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml --- apiVersion: apiextensions.k8s.io/v1 @@ -2258,7 +2258,7 @@ spec: type: string type: object podMonitorNamespaceSelector: - description: Namespaces to be selected for PodMonitor discovery. If nil, only check own namespace. + description: Namespace's labels to match for PodMonitor discovery. If nil, only check own namespace. properties: matchExpressions: description: matchExpressions is a list of label selector requirements. The requirements are ANDed. @@ -2849,7 +2849,7 @@ spec: description: Time duration Prometheus shall retain data for. Default is '24h', and must match the regular expression `[0-9]+(ms|s|m|h|d|w|y)` (milliseconds seconds minutes hours days weeks years). type: string retentionSize: - description: Maximum amount of disk space used by blocks. + description: 'Maximum amount of disk space used by blocks. Supported units: B, KB, MB, GB, TB, PB, EB. Ex: `512MB`.' type: string routePrefix: description: The route prefix Prometheus registers HTTP handlers for. This is useful, if using ExternalURL and a proxy is rewriting HTTP routes of a request, and the actual ExternalURL is still true, but the server serves requests under a different route prefix. For example for use with `kubectl proxy`. @@ -3019,7 +3019,7 @@ spec: description: ServiceAccountName is the name of the ServiceAccount to use to run the Prometheus Pods. type: string serviceMonitorNamespaceSelector: - description: Namespaces to be selected for ServiceMonitor discovery. If nil, only check own namespace. + description: Namespace's labels to match for ServiceMonitor discovery. If nil, only check own namespace. properties: matchExpressions: description: matchExpressions is a list of label selector requirements. The requirements are ANDed. @@ -3456,6 +3456,9 @@ spec: required: - key type: object + tracingConfigFile: + description: TracingConfig specifies the path of the tracing configuration file. When used alongside with TracingConfig, TracingConfigFile takes precedence. + type: string version: description: Version describes the version of Thanos to use. type: string diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-prometheusrules.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-prometheusrules.yaml index 02759cd..8c0776c 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-prometheusrules.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-prometheusrules.yaml @@ -1,4 +1,4 @@ -# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.44.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.45.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml --- apiVersion: apiextensions.k8s.io/v1 diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-servicemonitors.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-servicemonitors.yaml index f5d989d..a65be71 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-servicemonitors.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-servicemonitors.yaml @@ -1,4 +1,4 @@ -# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.44.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.45.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml --- apiVersion: apiextensions.k8s.io/v1 diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-thanosrulers.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-thanosrulers.yaml index f647e72..8fe6e81 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-thanosrulers.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/crds/crd-thanosrulers.yaml @@ -1,4 +1,4 @@ -# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.44.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.45.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml --- apiVersion: apiextensions.k8s.io/v1 diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/alertmanager.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/alertmanager.yaml index 78d0f7c..bbdbc56 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/alertmanager.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/alertmanager.yaml @@ -19,7 +19,7 @@ spec: listenLocal: {{ .Values.alertmanager.alertmanagerSpec.listenLocal }} serviceAccountName: {{ template "kube-prometheus-stack.alertmanager.serviceAccountName" . }} {{- if .Values.alertmanager.alertmanagerSpec.externalUrl }} - externalUrl: "{{ .Values.alertmanager.alertmanagerSpec.externalUrl }}" + externalUrl: "{{ tpl .Values.alertmanager.alertmanagerSpec.externalUrl . }}" {{- else if and .Values.alertmanager.ingress.enabled .Values.alertmanager.ingress.hosts }} externalUrl: "http://{{ tpl (index .Values.alertmanager.ingress.hosts 0) . }}{{ .Values.alertmanager.alertmanagerSpec.routePrefix }}" {{- else }} @@ -47,10 +47,14 @@ spec: {{- if .Values.alertmanager.alertmanagerSpec.alertmanagerConfigSelector }} alertmanagerConfigSelector: {{ toYaml .Values.alertmanager.alertmanagerSpec.alertmanagerConfigSelector | indent 4}} +{{ else }} + alertmanagerConfigSelector: {} {{- end }} {{- if .Values.alertmanager.alertmanagerSpec.alertmanagerConfigNamespaceSelector }} alertmanagerConfigNamespaceSelector: {{ toYaml .Values.alertmanager.alertmanagerSpec.alertmanagerConfigNamespaceSelector | indent 4}} +{{ else }} + alertmanagerConfigNamespaceSelector: {} {{- end }} {{- if .Values.alertmanager.alertmanagerSpec.resources }} resources: @@ -81,9 +85,9 @@ spec: requiredDuringSchedulingIgnoredDuringExecution: - topologyKey: {{ .Values.alertmanager.alertmanagerSpec.podAntiAffinityTopologyKey }} labelSelector: - matchLabels: - app: alertmanager - alertmanager: {{ template "kube-prometheus-stack.fullname" . }}-alertmanager + matchExpressions: + - {key: app, operator: In, values: [alertmanager]} + - {key: prometheus, operator: In, values: [{{ template "kube-prometheus-stack.fullname" . }}-alertmanager]} {{- else if eq .Values.alertmanager.alertmanagerSpec.podAntiAffinity "soft" }} podAntiAffinity: preferredDuringSchedulingIgnoredDuringExecution: @@ -91,15 +95,19 @@ spec: podAffinityTerm: topologyKey: {{ .Values.alertmanager.alertmanagerSpec.podAntiAffinityTopologyKey }} labelSelector: - matchLabels: - app: alertmanager - alertmanager: {{ template "kube-prometheus-stack.fullname" . }}-alertmanager + matchExpressions: + - {key: app, operator: In, values: [alertmanager]} + - {key: prometheus, operator: In, values: [{{ template "kube-prometheus-stack.fullname" . }}-alertmanager]} {{- end }} {{- end }} {{- if .Values.alertmanager.alertmanagerSpec.tolerations }} tolerations: {{ toYaml .Values.alertmanager.alertmanagerSpec.tolerations | indent 4 }} {{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.topologySpreadConstraints }} + topologySpreadConstraints: +{{ toYaml .Values.alertmanager.alertmanagerSpec.topologySpreadConstraints | indent 4 }} +{{- end }} {{- if .Values.global.imagePullSecrets }} imagePullSecrets: {{ toYaml .Values.global.imagePullSecrets | indent 4 }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/ingress.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/ingress.yaml index a87ce89..50fab14 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/ingress.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/ingress.yaml @@ -1,4 +1,5 @@ {{- if and .Values.alertmanager.enabled .Values.alertmanager.ingress.enabled }} +{{- $pathType := .Values.alertmanager.ingress.pathType | default "" }} {{- $serviceName := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }} {{- $servicePort := .Values.alertmanager.service.port -}} {{- $routePrefix := list .Values.alertmanager.alertmanagerSpec.routePrefix }} @@ -23,7 +24,7 @@ metadata: {{- end }} {{ include "kube-prometheus-stack.labels" . | indent 4 }} spec: - {{- if or (.Capabilities.APIVersions.Has "networking.k8s.io/v1/IngressClass") (.Capabilities.APIVersions.Has "networking.k8s.io/v1beta1/IngressClass") }} + {{- if or (.Capabilities.APIVersions.Has "networking.k8s.io/v1") (.Capabilities.APIVersions.Has "networking.k8s.io/v1beta1") }} {{- if .Values.alertmanager.ingress.ingressClassName }} ingressClassName: {{ .Values.alertmanager.ingress.ingressClassName }} {{- end }} @@ -36,6 +37,9 @@ spec: paths: {{- range $p := $paths }} - path: {{ tpl $p $ }} + {{- if $pathType }} + pathType: {{ $pathType }} + {{- end }} backend: serviceName: {{ $serviceName }} servicePort: {{ $servicePort }} @@ -46,6 +50,9 @@ spec: paths: {{- range $p := $paths }} - path: {{ tpl $p $ }} + {{- if $pathType }} + pathType: {{ $pathType }} + {{- end }} backend: serviceName: {{ $serviceName }} servicePort: {{ $servicePort }} @@ -53,6 +60,6 @@ spec: {{- end -}} {{- if .Values.alertmanager.ingress.tls }} tls: -{{ toYaml .Values.alertmanager.ingress.tls | indent 4 }} +{{ tpl (toYaml .Values.alertmanager.ingress.tls | indent 4) . }} {{- end -}} {{- end -}} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/ingressperreplica.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/ingressperreplica.yaml index 6aef97d..3d673b2 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/ingressperreplica.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/alertmanager/ingressperreplica.yaml @@ -1,4 +1,5 @@ {{- if and .Values.alertmanager.enabled .Values.alertmanager.servicePerReplica.enabled .Values.alertmanager.ingressPerReplica.enabled }} +{{- $pathType := .Values.alertmanager.ingressPerReplica.pathType | default "" }} {{- $count := .Values.alertmanager.alertmanagerSpec.replicas | int -}} {{- $servicePort := .Values.alertmanager.service.port -}} {{- $ingressValues := .Values.alertmanager.ingressPerReplica -}} @@ -29,7 +30,7 @@ items: {{ toYaml $ingressValues.annotations | indent 8 }} {{- end }} spec: - {{- if or ($.Capabilities.APIVersions.Has "networking.k8s.io/v1/IngressClass") ($.Capabilities.APIVersions.Has "networking.k8s.io/v1beta1/IngressClass") }} + {{- if or ($.Capabilities.APIVersions.Has "networking.k8s.io/v1") ($.Capabilities.APIVersions.Has "networking.k8s.io/v1beta1") }} {{- if $ingressValues.ingressClassName }} ingressClassName: {{ $ingressValues.ingressClassName }} {{- end }} @@ -40,6 +41,9 @@ items: paths: {{- range $p := $ingressValues.paths }} - path: {{ tpl $p $ }} + {{- if $pathType }} + pathType: {{ $pathType }} + {{- end }} backend: serviceName: {{ include "kube-prometheus-stack.fullname" $ }}-alertmanager-{{ $i }} servicePort: {{ $servicePort }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-state-metrics/serviceMonitor.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-state-metrics/serviceMonitor.yaml index f09de5d..5b723b2 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-state-metrics/serviceMonitor.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/exporters/kube-state-metrics/serviceMonitor.yaml @@ -25,6 +25,10 @@ spec: {{- end }} selector: matchLabels: +{{- if .Values.kubeStateMetrics.serviceMonitor.selectorOverride }} +{{ toYaml .Values.kubeStateMetrics.serviceMonitor.selectorOverride | indent 6 }} +{{ else }} app.kubernetes.io/name: kube-state-metrics app.kubernetes.io/instance: "{{ $.Release.Name }}" {{- end }} +{{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/configmaps-datasources.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/configmaps-datasources.yaml index de904dd..db62d53 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/configmaps-datasources.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/configmaps-datasources.yaml @@ -16,6 +16,7 @@ data: datasource.yaml: |- apiVersion: 1 datasources: +{{- $scrapeInterval := .Values.grafana.sidecar.datasources.defaultDatasourceScrapeInterval | default .Values.prometheus.prometheusSpec.scrapeInterval | default "30s" }} {{- if .Values.grafana.sidecar.datasources.defaultDatasourceEnabled }} - name: Prometheus type: prometheus @@ -23,7 +24,7 @@ data: access: proxy isDefault: true jsonData: - timeInterval: {{ .Values.prometheus.prometheusSpec.scrapeInterval | default "30s" }} + timeInterval: {{ $scrapeInterval }} {{- if .Values.grafana.sidecar.datasources.createPrometheusReplicasDatasources }} {{- range until (int .Values.prometheus.prometheusSpec.replicas) }} - name: Prometheus-{{ . }} @@ -32,7 +33,7 @@ data: access: proxy isDefault: false jsonData: - timeInterval: {{ .Values.prometheus.prometheusSpec.scrapeInterval | default "30s" }} + timeInterval: {{ $scrapeInterval }} {{- end }} {{- end }} {{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/cluster-total.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/cluster-total.yaml index 93bf909..1d1c3e9 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/cluster-total.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/cluster-total.yaml @@ -1823,7 +1823,7 @@ data: }, "datasource": "$datasource", - "hide": 2, + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }}, "includeAll": false, "label": null, "multi": false, diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/etcd.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/etcd.yaml index 0595cb4..66768d9 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/etcd.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/etcd.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'etcd' from https://raw.githubusercontent.com/etcd-io/etcd/master/Documentation/op-guide/grafana.json +Generated from 'etcd' from https://raw.githubusercontent.com/etcd-io/website/master/content/docs/current/op-guide/grafana.json Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/kubelet.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/kubelet.yaml index 5f1b2a2..7238299 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/kubelet.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/kubelet.yaml @@ -191,7 +191,7 @@ data: "tableColumn": "", "targets": [ { - "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", + "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}}", @@ -275,7 +275,7 @@ data: "tableColumn": "", "targets": [ { - "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", + "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}}", diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-pod.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-pod.yaml index e814ba7..c131e68 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-pod.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-pod.yaml @@ -1293,7 +1293,7 @@ data: }, "datasource": "$datasource", - "hide": 2, + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }}, "includeAll": false, "label": null, "multi": false, diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-workload.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-workload.yaml index a526290..097d7f5 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-workload.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-workload.yaml @@ -1533,7 +1533,7 @@ data: }, "datasource": "$datasource", - "hide": 2, + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }}, "includeAll": false, "label": null, "multi": false, diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml index 82b821a..62ab619 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml @@ -92,7 +92,7 @@ data: "timeShift": null, "title": "CPU Utilisation", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -178,7 +178,7 @@ data: "timeShift": null, "title": "CPU Saturation (load1 per CPU)", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -276,7 +276,7 @@ data: "timeShift": null, "title": "Memory Utilisation", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -362,7 +362,7 @@ data: "timeShift": null, "title": "Memory Saturation (Major Page Faults)", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -476,7 +476,7 @@ data: "timeShift": null, "title": "Net Utilisation (Bytes Receive/Transmit)", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -578,7 +578,7 @@ data: "timeShift": null, "title": "Net Saturation (Drops Receive/Transmit)", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -676,7 +676,7 @@ data: "timeShift": null, "title": "Disk IO Utilisation", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -762,7 +762,7 @@ data: "timeShift": null, "title": "Disk IO Saturation", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -860,7 +860,7 @@ data: "timeShift": null, "title": "Disk Space Utilisation", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -956,9 +956,9 @@ data: "30d" ] }, - "timezone": "UTC", + "timezone": "utc", "title": "USE Method / Cluster", - "uid": "3e97d1d02672cdd0861f4c97c64f89b2", + "uid": "", "version": 0 } {{- end }} \ No newline at end of file diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/node-rsrc-use.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/node-rsrc-use.yaml index 9cdfa64..cd21961 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/node-rsrc-use.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/node-rsrc-use.yaml @@ -92,7 +92,7 @@ data: "timeShift": null, "title": "CPU Utilisation", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -178,7 +178,7 @@ data: "timeShift": null, "title": "CPU Saturation (Load1 per CPU)", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -276,7 +276,7 @@ data: "timeShift": null, "title": "Memory Utilisation", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -362,7 +362,7 @@ data: "timeShift": null, "title": "Memory Saturation (Major Page Faults)", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -476,7 +476,7 @@ data: "timeShift": null, "title": "Net Utilisation (Bytes Receive/Transmit)", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -578,7 +578,7 @@ data: "timeShift": null, "title": "Net Saturation (Drops Receive/Transmit)", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -676,7 +676,7 @@ data: "timeShift": null, "title": "Disk IO Utilisation", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -762,7 +762,7 @@ data: "timeShift": null, "title": "Disk IO Saturation", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -860,7 +860,7 @@ data: "timeShift": null, "title": "Disk Space Utilisation", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -983,9 +983,9 @@ data: "30d" ] }, - "timezone": "UTC", + "timezone": "utc", "title": "USE Method / Node", - "uid": "fac67cfbe174d3ef53eb473d73d9212f", + "uid": "", "version": 0 } {{- end }} \ No newline at end of file diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes.yaml index b2935ae..2a29fc0 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes.yaml @@ -107,7 +107,7 @@ data: "timeShift": null, "title": "CPU Usage", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -221,7 +221,7 @@ data: "timeShift": null, "title": "Load Average", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -348,7 +348,7 @@ data: "timeShift": null, "title": "Memory Usage", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -451,9 +451,6 @@ data: ], "thresholds": "80, 90", "title": "Memory Usage", - "tooltip": { - "shared": false - }, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -562,7 +559,7 @@ data: "timeShift": null, "title": "Disk I/O", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -669,7 +666,7 @@ data: "timeShift": null, "title": "Disk Space Usage", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -776,7 +773,7 @@ data: "timeShift": null, "title": "Network Received", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -870,7 +867,7 @@ data: "timeShift": null, "title": "Network Transmitted", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -993,9 +990,8 @@ data: "30d" ] }, - "timezone": "UTC", + "timezone": "browser", "title": "Nodes", - "uid": "fa49a4706d07a042595b664c87fb33ea", "version": 0 } {{- end }} \ No newline at end of file diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml index 4ac20ce..06fb315 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml @@ -207,7 +207,7 @@ data: "tableColumn": "", "targets": [ { - "expr": "(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", + "expr": "max without(instance,node) (\n(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -404,7 +404,7 @@ data: "tableColumn": "", "targets": [ { - "expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", + "expr": "max without(instance,node) (\nkubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "", diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/pod-total.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/pod-total.yaml index 76c2c6e..95abda4 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/pod-total.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/pod-total.yaml @@ -1025,7 +1025,7 @@ data: }, "datasource": "$datasource", - "hide": 2, + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }}, "includeAll": false, "label": null, "multi": false, diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml index 950443a..b33b738 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml @@ -40,7 +40,7 @@ data: "links": [ ], - "refresh": "", + "refresh": "60s", "rows": [ { "collapse": false, @@ -92,7 +92,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(remote_name, url) group_right(instance) prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}\n)\n", + "expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(remote_name, url) group_right(instance) (prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} != 0)\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}remote_name{{`}}`}}:{{`{{`}}url{{`}}`}}", @@ -185,7 +185,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (remote_name, url) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n)\n", + "expr": "clamp_min(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (remote_name, url) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n, 0)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}remote_name{{`}}`}}:{{`{{`}}url{{`}}`}}", @@ -291,7 +291,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(remote_name, url) group_right(instance) rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n", + "expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(remote_name, url) group_right(instance) (rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]))\n- \n (rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}remote_name{{`}}`}}:{{`{{`}}url{{`}}`}}", @@ -876,7 +876,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"}", + "expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"} or prometheus_remote_storage_samples_pending{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}remote_name{{`}}`}}:{{`{{`}}url{{`}}`}}", @@ -1181,7 +1181,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}remote_name{{`}}`}}:{{`{{`}}url{{`}}`}}", @@ -1274,7 +1274,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}remote_name{{`}}`}}:{{`{{`}}url{{`}}`}}", @@ -1367,7 +1367,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_retried_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}remote_name{{`}}`}}:{{`{{`}}url{{`}}`}}", @@ -1520,7 +1520,7 @@ data: "schemaVersion": 14, "style": "dark", "tags": [ - + "prometheus-mixin" ], "templating": { "list": [ @@ -1664,7 +1664,7 @@ data: ] }, "timezone": "browser", - "title": "Prometheus Remote Write", + "title": "Prometheus / Remote Write", "version": 0 } {{- end }} \ No newline at end of file diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus.yaml index dfd3f3d..7095fb7 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus.yaml @@ -33,7 +33,7 @@ data: "links": [ ], - "refresh": "10s", + "refresh": "60s", "rows": [ { "collapse": false, @@ -1112,7 +1112,7 @@ data: "schemaVersion": 14, "style": "dark", "tags": [ - + "prometheus-mixin" ], "templating": { "list": [ @@ -1220,7 +1220,7 @@ data: ] }, "timezone": "utc", - "title": "Prometheus Overview", + "title": "Prometheus / Overview", "uid": "", "version": 0 } diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/workload-total.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/workload-total.yaml index 08d8cfd..07f5353 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/workload-total.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/workload-total.yaml @@ -1203,7 +1203,7 @@ data: }, "datasource": "$datasource", - "hide": 2, + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }}, "includeAll": false, "label": null, "multi": false, diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards/etcd.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards/etcd.yaml index 835a690..157843a 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards/etcd.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/grafana/dashboards/etcd.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'etcd' from https://raw.githubusercontent.com/etcd-io/etcd/master/Documentation/op-guide/grafana.json +Generated from 'etcd' from https://raw.githubusercontent.com/etcd-io/website/master/content/docs/current/op-guide/grafana.json Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/clusterrole.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/clusterrole.yaml index 7b31ab3..249af77 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/clusterrole.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/clusterrole.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create }} +{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/clusterrolebinding.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/clusterrolebinding.yaml index 4f1f616..31fd2de 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/clusterrolebinding.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/clusterrolebinding.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create }} +{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml index 37a19a5..f8afcb8 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled }} +{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} apiVersion: batch/v1 kind: Job metadata: diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml index b74c618..b2d8912 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled }} +{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} apiVersion: batch/v1 kind: Job metadata: diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/psp.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/psp.yaml index 98e002a..5834c48 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/psp.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/psp.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create .Values.global.rbac.pspEnabled }} +{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create .Values.global.rbac.pspEnabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} apiVersion: policy/v1beta1 kind: PodSecurityPolicy metadata: diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/role.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/role.yaml index 3609fe9..d229f76 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/role.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/role.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create }} +{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/rolebinding.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/rolebinding.yaml index dcb0fbc..f4b1fbf 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/rolebinding.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/rolebinding.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create }} +{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/serviceaccount.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/serviceaccount.yaml index 5296494..2048f04 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/serviceaccount.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/serviceaccount.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create }} +{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} apiVersion: v1 kind: ServiceAccount metadata: diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/mutatingWebhookConfiguration.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/mutatingWebhookConfiguration.yaml index b2dacb0..b67df54 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/mutatingWebhookConfiguration.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/mutatingWebhookConfiguration.yaml @@ -3,6 +3,11 @@ apiVersion: admissionregistration.k8s.io/v1 kind: MutatingWebhookConfiguration metadata: name: {{ template "kube-prometheus-stack.fullname" . }}-admission +{{- if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }} + annotations: + certmanager.k8s.io/inject-ca-from: {{ printf "%s/%s-root-cert" .Release.Namespace (include "kube-prometheus-stack.fullname" .) | quote }} + cert-manager.io/inject-ca-from: {{ printf "%s/%s-root-cert" .Release.Namespace (include "kube-prometheus-stack.fullname" .) | quote }} +{{- end }} labels: app: {{ template "kube-prometheus-stack.name" $ }}-admission {{- include "kube-prometheus-stack.labels" $ | indent 4 }} @@ -28,6 +33,9 @@ webhooks: namespace: {{ template "kube-prometheus-stack.namespace" . }} name: {{ template "kube-prometheus-stack.operator.fullname" $ }} path: /admission-prometheusrules/mutate + {{- if and .Values.prometheusOperator.admissionWebhooks.caBundle (not .Values.prometheusOperator.admissionWebhooks.patch.enabled) (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} + caBundle: {{ .Values.prometheusOperator.admissionWebhooks.caBundle }} + {{- end }} admissionReviewVersions: ["v1", "v1beta1"] sideEffects: None {{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/validatingWebhookConfiguration.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/validatingWebhookConfiguration.yaml index 3d48cd8..249488e 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/validatingWebhookConfiguration.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/validatingWebhookConfiguration.yaml @@ -3,6 +3,11 @@ apiVersion: admissionregistration.k8s.io/v1 kind: ValidatingWebhookConfiguration metadata: name: {{ template "kube-prometheus-stack.fullname" . }}-admission +{{- if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }} + annotations: + certmanager.k8s.io/inject-ca-from: {{ printf "%s/%s-root-cert" .Release.Namespace (include "kube-prometheus-stack.fullname" .) | quote }} + cert-manager.io/inject-ca-from: {{ printf "%s/%s-root-cert" .Release.Namespace (include "kube-prometheus-stack.fullname" .) | quote }} +{{- end }} labels: app: {{ template "kube-prometheus-stack.name" $ }}-admission {{- include "kube-prometheus-stack.labels" $ | indent 4 }} @@ -28,6 +33,9 @@ webhooks: namespace: {{ template "kube-prometheus-stack.namespace" . }} name: {{ template "kube-prometheus-stack.operator.fullname" $ }} path: /admission-prometheusrules/validate + {{- if and .Values.prometheusOperator.admissionWebhooks.caBundle (not .Values.prometheusOperator.admissionWebhooks.patch.enabled) (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} + caBundle: {{ .Values.prometheusOperator.admissionWebhooks.caBundle }} + {{- end }} admissionReviewVersions: ["v1", "v1beta1"] sideEffects: None {{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/certmanager.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/certmanager.yaml new file mode 100644 index 0000000..090e6a5 --- /dev/null +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/certmanager.yaml @@ -0,0 +1,57 @@ +{{- if .Values.prometheusOperator.admissionWebhooks.certManager.enabled -}} +{{- if not .Values.prometheusOperator.admissionWebhooks.certManager.issuerRef -}} +# Create a selfsigned Issuer, in order to create a root CA certificate for +# signing webhook serving certificates +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-self-signed-issuer + namespace: {{ template "kube-prometheus-stack.namespace" . }} +spec: + selfSigned: {} +--- +# Generate a CA Certificate used to sign certificates for the webhook +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-root-cert + namespace: {{ template "kube-prometheus-stack.namespace" . }} +spec: + secretName: {{ template "kube-prometheus-stack.fullname" . }}-root-cert + duration: 43800h # 5y + issuerRef: + name: {{ template "kube-prometheus-stack.fullname" . }}-self-signed-issuer + commonName: "ca.webhook.kube-prometheus-stack" + isCA: true +--- +# Create an Issuer that uses the above generated CA certificate to issue certs +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-root-issuer + namespace: {{ template "kube-prometheus-stack.namespace" . }} +spec: + ca: + secretName: {{ template "kube-prometheus-stack.fullname" . }}-root-cert +{{- end }} +--- +# generate a serving certificate for the apiservices to use +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + namespace: {{ template "kube-prometheus-stack.namespace" . }} +spec: + secretName: {{ template "kube-prometheus-stack.fullname" . }}-admission + duration: 8760h # 1y + issuerRef: + {{- if .Values.prometheusOperator.admissionWebhooks.certManager.issuerRef }} + {{- toYaml .Values.prometheusOperator.admissionWebhooks.certManager.issuerRef | nindent 4 }} + {{- else }} + name: {{ template "kube-prometheus-stack.fullname" . }}-root-issuer + {{- end }} + dnsNames: + - {{ template "kube-prometheus-stack.operator.fullname" . }} + - {{ template "kube-prometheus-stack.operator.fullname" . }}.{{ template "kube-prometheus-stack.namespace" . }} + - {{ template "kube-prometheus-stack.operator.fullname" . }}.{{ template "kube-prometheus-stack.namespace" . }}.svc +{{- end -}} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/deployment.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/deployment.yaml index c991789..15b3684 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/deployment.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/deployment.yaml @@ -58,24 +58,18 @@ spec: {{- end }} - --namespaces={{ $ns | join "," }} {{- end }} - {{- if (semverCompare "< v0.44.0" .Values.prometheusOperator.image.tag) -}} - - --logtostderr=true - {{- end }} - --localhost=127.0.0.1 + {{- if .Values.prometheusOperator.prometheusDefaultBaseImage }} + - --prometheus-default-base-image={{ .Values.prometheusOperator.prometheusDefaultBaseImage }} + {{- end }} + {{- if .Values.prometheusOperator.alertmanagerDefaultBaseImage }} + - --alertmanager-default-base-image={{ .Values.prometheusOperator.alertmanagerDefaultBaseImage }} + {{- end }} {{- if .Values.prometheusOperator.prometheusConfigReloaderImage.sha }} - --prometheus-config-reloader={{ .Values.prometheusOperator.prometheusConfigReloaderImage.repository }}:{{ .Values.prometheusOperator.prometheusConfigReloaderImage.tag }}@sha256:{{ .Values.prometheusOperator.prometheusConfigReloaderImage.sha }} {{- else }} - --prometheus-config-reloader={{ .Values.prometheusOperator.prometheusConfigReloaderImage.repository }}:{{ .Values.prometheusOperator.prometheusConfigReloaderImage.tag }} {{- end }} - # Empty if statement to catch non-semver master tags that do not need the --config-reloader-image flag - {{- if regexMatch "master.*" .Values.prometheusOperator.image.tag -}} - {{- else if (semverCompare "< v0.43.0" .Values.prometheusOperator.image.tag) -}} - {{- if .Values.prometheusOperator.configmapReloadImage.sha }} - - --config-reloader-image={{ .Values.prometheusOperator.configmapReloadImage.repository }}:{{ .Values.prometheusOperator.configmapReloadImage.tag }}@sha256:{{ .Values.prometheusOperator.configmapReloadImage.sha }} - {{- else }} - - --config-reloader-image={{ .Values.prometheusOperator.configmapReloadImage.repository }}:{{ .Values.prometheusOperator.configmapReloadImage.tag }} - {{- end }} - {{- end }} - --config-reloader-cpu={{ .Values.prometheusOperator.configReloaderCpu }} - --config-reloader-memory={{ .Values.prometheusOperator.configReloaderMemory }} {{- if .Values.prometheusOperator.alertmanagerInstanceNamespaces }} @@ -90,14 +84,17 @@ spec: {{- if .Values.prometheusOperator.secretFieldSelector }} - --secret-field-selector={{ .Values.prometheusOperator.secretFieldSelector }} {{- end }} + {{- if .Values.prometheusOperator.clusterDomain }} + - --cluster-domain={{ .Values.prometheusOperator.clusterDomain }} + {{- end }} {{- if .Values.prometheusOperator.tls.enabled }} - --web.enable-tls=true - - --web.cert-file=cert/cert - - --web.key-file=cert/key - - --web.listen-address=:8443 + - --web.cert-file=/cert/{{ if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }}tls.crt{{ else }}cert{{ end }} + - --web.key-file=/cert/{{ if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }}tls.key{{ else }}key{{ end }} + - --web.listen-address=:{{ .Values.prometheusOperator.tls.internalPort }} - --web.tls-min-version={{ .Values.prometheusOperator.tls.tlsMinVersion }} ports: - - containerPort: 8443 + - containerPort: {{ .Values.prometheusOperator.tls.internalPort }} name: https {{- else }} ports: @@ -109,19 +106,21 @@ spec: securityContext: allowPrivilegeEscalation: false readOnlyRootFilesystem: true - {{- if .Values.prometheusOperator.tls.enabled }} +{{- if .Values.prometheusOperator.tls.enabled }} volumeMounts: - name: tls-secret mountPath: /cert readOnly: true - {{- end }} -{{- if .Values.prometheusOperator.tls.enabled }} volumes: - name: tls-secret secret: defaultMode: 420 secretName: {{ template "kube-prometheus-stack.fullname" . }}-admission {{- end }} + {{- with .Values.prometheusOperator.dnsConfig }} + dnsConfig: +{{ toYaml . | indent 8 }} + {{- end }} {{- if .Values.prometheusOperator.securityContext }} securityContext: {{ toYaml .Values.prometheusOperator.securityContext | indent 8 }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/servicemonitor.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/servicemonitor.yaml index 7524ddf..b7bd952 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/servicemonitor.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus-operator/servicemonitor.yaml @@ -17,7 +17,7 @@ spec: ca: secret: name: {{ template "kube-prometheus-stack.fullname" . }}-admission - key: ca + key: {{ if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }}ca.crt{{ else }}ca{{ end }} optional: false {{- else }} - port: http diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/ingress.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/ingress.yaml index 59bd4b6..4d45873 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/ingress.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/ingress.yaml @@ -1,4 +1,5 @@ {{- if and .Values.prometheus.enabled .Values.prometheus.ingress.enabled }} +{{- $pathType := .Values.prometheus.ingress.pathType | default "" }} {{- $serviceName := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" }} {{- $servicePort := .Values.prometheus.service.port -}} {{- $routePrefix := list .Values.prometheus.prometheusSpec.routePrefix }} @@ -23,7 +24,7 @@ metadata: {{ toYaml .Values.prometheus.ingress.labels | indent 4 }} {{- end }} spec: - {{- if or (.Capabilities.APIVersions.Has "networking.k8s.io/v1/IngressClass") (.Capabilities.APIVersions.Has "networking.k8s.io/v1beta1/IngressClass") }} + {{- if or (.Capabilities.APIVersions.Has "networking.k8s.io/v1") (.Capabilities.APIVersions.Has "networking.k8s.io/v1beta1") }} {{- if .Values.prometheus.ingress.ingressClassName }} ingressClassName: {{ .Values.prometheus.ingress.ingressClassName }} {{- end }} @@ -36,6 +37,9 @@ spec: paths: {{- range $p := $paths }} - path: {{ tpl $p $ }} + {{- if $pathType }} + pathType: {{ $pathType }} + {{- end }} backend: serviceName: {{ $serviceName }} servicePort: {{ $servicePort }} @@ -46,6 +50,9 @@ spec: paths: {{- range $p := $paths }} - path: {{ tpl $p $ }} + {{- if $pathType }} + pathType: {{ $pathType }} + {{- end }} backend: serviceName: {{ $serviceName }} servicePort: {{ $servicePort }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/ingressThanosSidecar.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/ingressThanosSidecar.yaml index 45f9cb2..69de0f6 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/ingressThanosSidecar.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/ingressThanosSidecar.yaml @@ -1,9 +1,14 @@ {{- if and .Values.prometheus.enabled .Values.prometheus.thanosIngress.enabled }} +{{- $pathType := .Values.prometheus.thanosIngress.pathType | default "" }} {{- $serviceName := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" }} {{- $thanosPort := .Values.prometheus.thanosIngress.servicePort -}} {{- $routePrefix := list .Values.prometheus.prometheusSpec.routePrefix }} {{- $paths := .Values.prometheus.thanosIngress.paths | default $routePrefix -}} +{{- if .Capabilities.APIVersions.Has "networking.k8s.io/v1beta1" }} +apiVersion: networking.k8s.io/v1beta1 +{{ else }} apiVersion: extensions/v1beta1 +{{ end -}} kind: Ingress metadata: {{- if .Values.prometheus.thanosIngress.annotations }} @@ -18,7 +23,7 @@ metadata: {{ toYaml .Values.prometheus.thanosIngress.labels | indent 4 }} {{- end }} spec: - {{- if or (.Capabilities.APIVersions.Has "networking.k8s.io/v1/IngressClass") (.Capabilities.APIVersions.Has "networking.k8s.io/v1beta1/IngressClass") }} + {{- if or (.Capabilities.APIVersions.Has "networking.k8s.io/v1") (.Capabilities.APIVersions.Has "networking.k8s.io/v1beta1") }} {{- if .Values.prometheus.thanosIngress.ingressClassName }} ingressClassName: {{ .Values.prometheus.thanosIngress.ingressClassName }} {{- end }} @@ -31,6 +36,9 @@ spec: paths: {{- range $p := $paths }} - path: {{ tpl $p $ }} + {{- if $pathType }} + pathType: {{ $pathType }} + {{- end }} backend: serviceName: {{ $serviceName }} servicePort: {{ $thanosPort }} @@ -41,6 +49,9 @@ spec: paths: {{- range $p := $paths }} - path: {{ tpl $p $ }} + {{- if $pathType }} + pathType: {{ $pathType }} + {{- end }} backend: serviceName: {{ $serviceName }} servicePort: {{ $thanosPort }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/ingressperreplica.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/ingressperreplica.yaml index c1959c6..3314377 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/ingressperreplica.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/ingressperreplica.yaml @@ -1,4 +1,5 @@ {{- if and .Values.prometheus.enabled .Values.prometheus.servicePerReplica.enabled .Values.prometheus.ingressPerReplica.enabled }} +{{- $pathType := .Values.prometheus.ingressPerReplica.pathType | default "" }} {{- $count := .Values.prometheus.prometheusSpec.replicas | int -}} {{- $servicePort := .Values.prometheus.servicePerReplica.port -}} {{- $ingressValues := .Values.prometheus.ingressPerReplica -}} @@ -29,7 +30,7 @@ items: {{ toYaml $ingressValues.annotations | indent 8 }} {{- end }} spec: - {{- if or ($.Capabilities.APIVersions.Has "networking.k8s.io/v1/IngressClass") ($.Capabilities.APIVersions.Has "networking.k8s.io/v1beta1/IngressClass") }} + {{- if or ($.Capabilities.APIVersions.Has "networking.k8s.io/v1") ($.Capabilities.APIVersions.Has "networking.k8s.io/v1beta1") }} {{- if $ingressValues.ingressClassName }} ingressClassName: {{ $ingressValues.ingressClassName }} {{- end }} @@ -40,6 +41,9 @@ items: paths: {{- range $p := $ingressValues.paths }} - path: {{ tpl $p $ }} + {{- if $pathType }} + pathType: {{ $pathType }} + {{- end }} backend: serviceName: {{ include "kube-prometheus-stack.fullname" $ }}-prometheus-{{ $i }} servicePort: {{ $servicePort }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/prometheus.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/prometheus.yaml index 66ac5a8..eb561e6 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/prometheus.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/prometheus.yaml @@ -40,7 +40,7 @@ spec: {{- end }} {{- if .Values.prometheus.prometheusSpec.externalLabels }} externalLabels: -{{ toYaml .Values.prometheus.prometheusSpec.externalLabels | indent 4}} +{{ tpl (toYaml .Values.prometheus.prometheusSpec.externalLabels | indent 4) . }} {{- end }} {{- if .Values.prometheus.prometheusSpec.prometheusExternalLabelNameClear }} prometheusExternalLabelName: "" @@ -65,6 +65,7 @@ spec: {{- end }} paused: {{ .Values.prometheus.prometheusSpec.paused }} replicas: {{ .Values.prometheus.prometheusSpec.replicas }} + shards: {{ .Values.prometheus.prometheusSpec.shards }} logLevel: {{ .Values.prometheus.prometheusSpec.logLevel }} logFormat: {{ .Values.prometheus.prometheusSpec.logFormat }} listenLocal: {{ .Values.prometheus.prometheusSpec.listenLocal }} @@ -200,9 +201,9 @@ spec: requiredDuringSchedulingIgnoredDuringExecution: - topologyKey: {{ .Values.prometheus.prometheusSpec.podAntiAffinityTopologyKey }} labelSelector: - matchLabels: - app: prometheus - prometheus: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + matchExpressions: + - {key: app, operator: In, values: [prometheus]} + - {key: prometheus, operator: In, values: [{{ template "kube-prometheus-stack.fullname" . }}-prometheus]} {{- else if eq .Values.prometheus.prometheusSpec.podAntiAffinity "soft" }} podAntiAffinity: preferredDuringSchedulingIgnoredDuringExecution: @@ -210,15 +211,19 @@ spec: podAffinityTerm: topologyKey: {{ .Values.prometheus.prometheusSpec.podAntiAffinityTopologyKey }} labelSelector: - matchLabels: - app: prometheus - prometheus: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + matchExpressions: + - {key: app, operator: In, values: [prometheus]} + - {key: prometheus, operator: In, values: [{{ template "kube-prometheus-stack.fullname" . }}-prometheus]} {{- end }} {{- end }} {{- if .Values.prometheus.prometheusSpec.tolerations }} tolerations: {{ toYaml .Values.prometheus.prometheusSpec.tolerations | indent 4 }} {{- end }} +{{- if .Values.prometheus.prometheusSpec.topologySpreadConstraints }} + topologySpreadConstraints: +{{ toYaml .Values.prometheus.prometheusSpec.topologySpreadConstraints | indent 4 }} +{{- end }} {{- if .Values.global.imagePullSecrets }} imagePullSecrets: {{ toYaml .Values.global.imagePullSecrets | indent 4 }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/psp.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/psp.yaml index bd2b270..08da5e1 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/psp.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/psp.yaml @@ -26,6 +26,9 @@ spec: - 'secret' - 'downwardAPI' - 'persistentVolumeClaim' +{{- if .Values.prometheus.podSecurityPolicy.volumes }} +{{ toYaml .Values.prometheus.podSecurityPolicy.volumes | indent 4 }} +{{- end }} hostNetwork: false hostIPC: false hostPID: false @@ -52,4 +55,8 @@ spec: allowedCapabilities: {{ toYaml .Values.prometheus.podSecurityPolicy.allowedCapabilities | indent 4 }} {{- end }} +{{- if .Values.prometheus.podSecurityPolicy.allowedHostPaths }} + allowedHostPaths: +{{ toYaml .Values.prometheus.podSecurityPolicy.allowedHostPaths | indent 4 }} +{{- end }} {{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml index 8358704..2a46523 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'etcd' group from https://raw.githubusercontent.com/etcd-io/etcd/master/Documentation/op-guide/etcd3_alert.rules.yml +Generated from 'etcd' group from https://raw.githubusercontent.com/etcd-io/website/master/content/docs/v3.4.0/op-guide/etcd3_alert.rules.yml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} @@ -24,24 +24,6 @@ spec: groups: - name: etcd rules: - - alert: etcdMembersDown - annotations: - message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": members are down ({{`{{`}} $value {{`}}`}}).' - expr: |- - max by (job) ( - sum by (job) (up{job=~".*etcd.*"} == bool 0) - or - count by (job,endpoint) ( - sum by (job,endpoint,To) (rate(etcd_network_peer_sent_failures_total{job=~".*etcd.*"}[3m])) > 0.01 - ) - ) - > 0 - for: 3m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - alert: etcdInsufficientMembers annotations: message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": insufficient members ({{`{{`}} $value {{`}}`}}).' @@ -64,9 +46,9 @@ spec: {{- end }} - alert: etcdHighNumberOfLeaderChanges annotations: - message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.' - expr: increase((max by (job) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m]) >= 3 - for: 5m + message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": instance {{`{{`}} $labels.instance {{`}}`}} has seen {{`{{`}} $value {{`}}`}} leader changes within the last hour.' + expr: rate(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}[15m]) > 3 + for: 15m labels: severity: warning {{- if .Values.defaultRules.additionalRuleLabels }} @@ -98,7 +80,7 @@ spec: {{- end }} - alert: etcdHighNumberOfFailedProposals annotations: - message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} proposal failures within the last 30 minutes on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} proposal failures within the last hour on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5 for: 15m labels: @@ -166,4 +148,4 @@ spec: {{- if .Values.defaultRules.additionalRuleLabels }} {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} {{- end }} -{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.yaml index 35e4ede..19511e8 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'k8s.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml +Generated from 'k8s.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} @@ -24,8 +24,6 @@ spec: groups: - name: k8s.rules rules: - - expr: sum(rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])) by (namespace) - record: namespace:container_cpu_usage_seconds_total:sum_rate - expr: |- sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m]) @@ -57,8 +55,6 @@ spec: max by(namespace, pod, node) (kube_pod_info{node!=""}) ) record: node_namespace_pod_container:container_memory_swap - - expr: sum(container_memory_usage_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}) by (namespace) - record: namespace:container_memory_usage_bytes:sum - expr: |- sum by (namespace) ( sum by (namespace, pod) ( diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml index 78a09e3..7b00b54 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'kube-apiserver-availability.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml +Generated from 'kube-apiserver-availability.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} @@ -99,54 +99,56 @@ spec: labels: verb: write record: apiserver_request:availability30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[30d])) + - expr: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30 record: code_verb:apiserver_request_total:increase30d + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[1h])) + record: code_verb:apiserver_request_total:increase1h - expr: sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"}) labels: verb: read diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml index da0de91..0f44ccc 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'kube-apiserver-slos' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml +Generated from 'kube-apiserver-slos' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver.rules.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver.rules.yaml index b4d1a0f..eddc1e4 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver.rules.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver.rules.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'kube-apiserver.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml +Generated from 'kube-apiserver.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} @@ -343,11 +343,6 @@ spec: quantile: '0.99' verb: write record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile - - expr: |- - sum(rate(apiserver_request_duration_seconds_sum{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod) - / - sum(rate(apiserver_request_duration_seconds_count{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod) - record: cluster:apiserver_request_duration_seconds:mean5m - expr: histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)) labels: quantile: '0.99' diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml index 594f1bb..24df268 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'kube-scheduler.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml +Generated from 'kube-scheduler.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubelet.rules.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubelet.rules.yaml index f9bd1cc..8712b9f 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubelet.rules.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubelet.rules.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'kubelet.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml +Generated from 'kubelet.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-apps.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-apps.yaml index 6eb5e02..198bbb8 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-apps.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-apps.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml +Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} @@ -27,10 +27,10 @@ spec: rules: - alert: KubePodCrashLooping annotations: - description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is restarting {{`{{`}} printf "%.2f" $value {{`}}`}} times / 5 minutes. + description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is restarting {{`{{`}} printf "%.2f" $value {{`}}`}} times / 10 minutes. runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodcrashlooping summary: Pod is crash looping. - expr: rate(kube_pod_container_status_restarts_total{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[5m]) * 60 * 5 > 0 + expr: rate(kube_pod_container_status_restarts_total{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[10m]) * 60 * 5 > 0 for: 15m labels: severity: warning diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-resources.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-resources.yaml index 87933e5..898f8ee 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-resources.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-resources.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'kubernetes-resources' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml +Generated from 'kubernetes-resources' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-storage.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-storage.yaml index 1fadb59..527e6e3 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-storage.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-storage.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'kubernetes-storage' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml +Generated from 'kubernetes-storage' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml index f9acb6a..2ed298b 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'kubernetes-system-apiserver' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml +Generated from 'kubernetes-system-apiserver' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} @@ -48,16 +48,15 @@ spec: {{- end }} - alert: AggregatedAPIErrors annotations: - description: An aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has reported errors. The number of errors have increased for it in the past five minutes. High values indicate that the availability of the service changes too often. + description: An aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has reported errors. It has appeared unavailable {{`{{`}} $value | humanize {{`}}`}} times averaged over the past 10m. runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-aggregatedapierrors summary: An aggregated API has reported errors. - expr: sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2 + expr: sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[10m])) > 4 labels: severity: warning {{- if .Values.defaultRules.additionalRuleLabels }} {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} {{- end }} -{{- if semverCompare ">=1.18.0-0" $kubeTargetVersion }} - alert: AggregatedAPIDown annotations: description: An aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m. @@ -67,7 +66,6 @@ spec: for: 5m labels: severity: warning -{{- end }} {{- if .Values.defaultRules.additionalRuleLabels }} {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} {{- end }} @@ -85,4 +83,16 @@ spec: {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} {{- end }} {{- end }} + - alert: KubeAPITerminatedRequests + annotations: + description: The apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapiterminatedrequests + summary: The apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests. + expr: sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / ( sum(rate(apiserver_request_total{job="apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20 + for: 5m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml index bc9dab8..3d1ace1 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'kubernetes-system-controller-manager' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml +Generated from 'kubernetes-system-controller-manager' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml index cde9da4..4d536ec 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'kubernetes-system-kubelet' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml +Generated from 'kubernetes-system-kubelet' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml index a2c1272..098f6fb 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'kubernetes-system-scheduler' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml +Generated from 'kubernetes-system-scheduler' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system.yaml index 3aca2e6..52230c6 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'kubernetes-system' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml +Generated from 'kubernetes-system' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.rules.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.rules.yaml index b6ae1bb..ddb7376 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.rules.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.rules.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'node-exporter.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml +Generated from 'node-exporter.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/node-exporter-prometheusRule.yaml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml index 7d5ab7d..3be497c 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'node-exporter' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml +Generated from 'node-exporter' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/node-exporter-prometheusRule.yaml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} @@ -27,7 +27,6 @@ spec: - alert: NodeFilesystemSpaceFillingUp annotations: description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemspacefillingup summary: Filesystem is predicted to run out of space within the next 24 hours. expr: |- ( @@ -46,7 +45,6 @@ spec: - alert: NodeFilesystemSpaceFillingUp annotations: description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up fast. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemspacefillingup summary: Filesystem is predicted to run out of space within the next 4 hours. expr: |- ( @@ -65,7 +63,6 @@ spec: - alert: NodeFilesystemAlmostOutOfSpace annotations: description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemalmostoutofspace summary: Filesystem has less than 5% space left. expr: |- ( @@ -82,7 +79,6 @@ spec: - alert: NodeFilesystemAlmostOutOfSpace annotations: description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemalmostoutofspace summary: Filesystem has less than 3% space left. expr: |- ( @@ -99,7 +95,6 @@ spec: - alert: NodeFilesystemFilesFillingUp annotations: description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left and is filling up. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemfilesfillingup summary: Filesystem is predicted to run out of inodes within the next 24 hours. expr: |- ( @@ -118,7 +113,6 @@ spec: - alert: NodeFilesystemFilesFillingUp annotations: description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left and is filling up fast. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemfilesfillingup summary: Filesystem is predicted to run out of inodes within the next 4 hours. expr: |- ( @@ -137,7 +131,6 @@ spec: - alert: NodeFilesystemAlmostOutOfFiles annotations: description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemalmostoutoffiles summary: Filesystem has less than 5% inodes left. expr: |- ( @@ -154,7 +147,6 @@ spec: - alert: NodeFilesystemAlmostOutOfFiles annotations: description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemalmostoutoffiles summary: Filesystem has less than 3% inodes left. expr: |- ( @@ -171,7 +163,6 @@ spec: - alert: NodeNetworkReceiveErrs annotations: description: '{{`{{`}} $labels.instance {{`}}`}} interface {{`{{`}} $labels.device {{`}}`}} has encountered {{`{{`}} printf "%.0f" $value {{`}}`}} receive errors in the last two minutes.' - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodenetworkreceiveerrs summary: Network interface is reporting many receive errors. expr: rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01 for: 1h @@ -183,7 +174,6 @@ spec: - alert: NodeNetworkTransmitErrs annotations: description: '{{`{{`}} $labels.instance {{`}}`}} interface {{`{{`}} $labels.device {{`}}`}} has encountered {{`{{`}} printf "%.0f" $value {{`}}`}} transmit errors in the last two minutes.' - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodenetworktransmiterrs summary: Network interface is reporting many transmit errors. expr: rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01 for: 1h @@ -195,7 +185,6 @@ spec: - alert: NodeHighNumberConntrackEntriesUsed annotations: description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of conntrack entries are used.' - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodehighnumberconntrackentriesused summary: Number of conntrack are getting close to the limit. expr: (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75 labels: @@ -206,7 +195,6 @@ spec: - alert: NodeTextFileCollectorScrapeError annotations: description: Node Exporter text file collector failed to scrape. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodetextfilecollectorscrapeerror summary: Node Exporter text file collector failed to scrape. expr: node_textfile_scrape_error{job="node-exporter"} == 1 labels: @@ -217,7 +205,6 @@ spec: - alert: NodeClockSkewDetected annotations: message: Clock on {{`{{`}} $labels.instance {{`}}`}} is out of sync by more than 300s. Ensure NTP is configured correctly on this host. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodeclockskewdetected summary: Clock skew detected. expr: |- ( @@ -240,7 +227,6 @@ spec: - alert: NodeClockNotSynchronising annotations: message: Clock on {{`{{`}} $labels.instance {{`}}`}} is not synchronising. Ensure NTP is configured on this host. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodeclocknotsynchronising summary: Clock not synchronising. expr: |- min_over_time(node_timex_sync_status[5m]) == 0 @@ -255,7 +241,6 @@ spec: - alert: NodeRAIDDegraded annotations: description: RAID array '{{`{{`}} $labels.device {{`}}`}}' on {{`{{`}} $labels.instance {{`}}`}} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-noderaiddegraded summary: RAID Array is degraded expr: node_md_disks_required - ignoring (state) (node_md_disks{state="active"}) > 0 for: 15m @@ -267,7 +252,6 @@ spec: - alert: NodeRAIDDiskFailure annotations: description: At least one device in RAID array on {{`{{`}} $labels.instance {{`}}`}} failed. Array '{{`{{`}} $labels.device {{`}}`}}' needs attention and possibly a disk swap. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-noderaiddiskfailure summary: Failed device in RAID array expr: node_md_disks{state="fail"} > 0 labels: diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node.rules.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node.rules.yaml index 7253b31..c841e6f 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node.rules.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node.rules.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'node.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml +Generated from 'node.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} @@ -24,8 +24,6 @@ spec: groups: - name: node.rules rules: - - expr: sum(min(kube_pod_info{node!=""}) by (cluster, node)) - record: ':kube_pod_info_node_count:' - expr: |- topk by(namespace, pod) (1, max by (node, namespace, pod) ( diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules/etcd.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules/etcd.yaml index e9c4f6c..28cc925 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules/etcd.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/rules/etcd.yaml @@ -1,5 +1,5 @@ {{- /* -Generated from 'etcd' group from https://raw.githubusercontent.com/etcd-io/etcd/master/Documentation/op-guide/etcd3_alert.rules.yml +Generated from 'etcd' group from https://raw.githubusercontent.com/etcd-io/website/master/content/docs/v3.4.0/op-guide/etcd3_alert.rules.yml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} @@ -24,24 +24,6 @@ spec: groups: - name: etcd rules: - - alert: etcdMembersDown - annotations: - message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": members are down ({{`{{`}} $value {{`}}`}}).' - expr: |- - max by (job) ( - sum by (job) (up{job=~".*etcd.*"} == bool 0) - or - count by (job,endpoint) ( - sum by (job,endpoint,To) (rate(etcd_network_peer_sent_failures_total{job=~".*etcd.*"}[3m])) > 0.01 - ) - ) - > 0 - for: 3m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - alert: etcdInsufficientMembers annotations: message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": insufficient members ({{`{{`}} $value {{`}}`}}).' @@ -64,9 +46,9 @@ spec: {{- end }} - alert: etcdHighNumberOfLeaderChanges annotations: - message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.' - expr: increase((max by (job) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m]) >= 3 - for: 5m + message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": instance {{`{{`}} $labels.instance {{`}}`}} has seen {{`{{`}} $value {{`}}`}} leader changes within the last hour.' + expr: rate(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}[15m]) > 3 + for: 15m labels: severity: warning {{- if .Values.defaultRules.additionalRuleLabels }} @@ -126,7 +108,7 @@ spec: {{- end }} - alert: etcdHighNumberOfFailedProposals annotations: - message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} proposal failures within the last 30 minutes on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} proposal failures within the last hour on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5 for: 15m labels: @@ -194,4 +176,4 @@ spec: {{- if .Values.defaultRules.additionalRuleLabels }} {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} {{- end }} -{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/service.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/service.yaml index cb831c7..8676b81 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/service.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/service.yaml @@ -39,6 +39,14 @@ spec: {{- end }} port: {{ .Values.prometheus.service.port }} targetPort: {{ .Values.prometheus.service.targetPort }} + {{- if .Values.prometheus.thanosIngress.enabled }} + - name: grpc + {{- if eq .Values.prometheus.service.type "NodePort" }} + nodePort: {{ .Values.prometheus.thanosIngress.nodePort }} + {{- end }} + port: {{ .Values.prometheus.thanosIngress.servicePort }} + targetPort: {{ .Values.prometheus.thanosIngress.servicePort }} + {{- end }} {{- if .Values.prometheus.service.additionalPorts }} {{ toYaml .Values.prometheus.service.additionalPorts | indent 2 }} {{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/serviceThanosSIdecar.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/serviceThanosSIdecar.yaml new file mode 100644 index 0000000..6ae1b14 --- /dev/null +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/serviceThanosSIdecar.yaml @@ -0,0 +1,27 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.thanosService.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-thanos-discovery + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-thanos-discovery +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.prometheus.thanosService.labels }} +{{ toYaml .Values.prometheus.thanosService.labels | indent 4 }} +{{- end }} +{{- if .Values.prometheus.thanosService.annotations }} + annotations: +{{ toYaml .Values.prometheus.thanosService.annotations | indent 4 }} +{{- end }} +spec: + type: ClusterIP + clusterIP: None + ports: + - name: {{ .Values.prometheus.thanosService.portName }} + port: {{ .Values.prometheus.thanosService.port }} + targetPort: {{ .Values.prometheus.thanosService.targetPort }} + selector: + app: prometheus + prometheus: {{ template "kube-prometheus-stack.fullname" . }}-prometheus +{{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/servicemonitors.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/servicemonitors.yaml index 959df21..a78d1cd 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/servicemonitors.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/templates/prometheus/servicemonitors.yaml @@ -30,5 +30,9 @@ items: targetLabels: {{ toYaml .targetLabels | indent 8 }} {{- end }} + {{- if .podTargetLabels }} + podTargetLabels: +{{ toYaml .podTargetLabels | indent 8 }} + {{- end }} {{- end }} {{- end }} diff --git a/charts/kubezero-metrics/charts/kube-prometheus-stack/values.yaml b/charts/kubezero-metrics/charts/kube-prometheus-stack/values.yaml index 3aa810f..66114e2 100644 --- a/charts/kubezero-metrics/charts/kube-prometheus-stack/values.yaml +++ b/charts/kubezero-metrics/charts/kube-prometheus-stack/values.yaml @@ -158,6 +158,8 @@ alertmanager: receiver: 'null' receivers: - name: 'null' + templates: + - '/etc/alertmanager/config/*.tmpl' ## Pass the Alertmanager configuration directives through Helm's templating ## engine. If the Alertmanager configuration contains Alertmanager templates, @@ -170,6 +172,10 @@ alertmanager: tplConfig: false ## Alertmanager template files to format alerts + ## By default, templateFiles are placed in /etc/alertmanager/config/ and if + ## they have a .tmpl file suffix will be loaded. See config.templates above + ## to change, add other suffixes. If adding other suffixes, be sure to update + ## config.templates above to include those suffixes. ## ref: https://prometheus.io/docs/alerting/notifications/ ## https://prometheus.io/docs/alerting/notification_examples/ ## @@ -214,6 +220,10 @@ alertmanager: paths: [] # - / + ## For Kubernetes >= 1.18 you should specify the pathType (determines how Ingress paths should be matched) + ## See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#better-path-matching-with-path-types + # pathType: ImplementationSpecific + ## TLS configuration for Alertmanager Ingress ## Secret must be manually created in the namespace ## @@ -254,6 +264,10 @@ alertmanager: paths: [] # - / + ## For Kubernetes >= 1.18 you should specify the pathType (determines how Ingress paths should be matched) + ## See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#better-path-matching-with-path-types + # pathType: ImplementationSpecific + ## Secret name containing the TLS certificate for alertmanager per replica ingress ## Secret must be manually created in the namespace tlsSecretName: "" @@ -520,6 +534,17 @@ alertmanager: # value: "value" # effect: "NoSchedule" + ## If specified, the pod's topology spread constraints. + ## ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/ + ## + topologySpreadConstraints: [] + # - maxSkew: 1 + # topologyKey: topology.kubernetes.io/zone + # whenUnsatisfiable: DoNotSchedule + # labelSelector: + # matchLabels: + # app: alertmanager + ## SecurityContext holds pod-level security attributes and common container settings. ## This defaults to non root user with uid 1000 and gid 2000. *v1.PodSecurityContext false ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ @@ -628,6 +653,9 @@ grafana: enabled: true defaultDatasourceEnabled: true + # If not defined, will use prometheus.prometheusSpec.scrapeInterval or its default + # defaultDatasourceScrapeInterval: 15s + ## Annotations for Grafana datasource configmaps ## annotations: {} @@ -1145,6 +1173,9 @@ kubeStateMetrics: ## Scrape interval. If not set, the Prometheus default scrape interval is used. ## interval: "" + ## Override serviceMonitor selector + ## + selectorOverride: {} ## metric relabel configs to apply to samples before ingestion. ## @@ -1218,25 +1249,31 @@ prometheus-node-exporter: ## jobLabel: node-exporter extraArgs: - - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/) - - --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$ + - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/) + - --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$ ## Manages Prometheus and Alertmanager components ## prometheusOperator: enabled: true - # Prometheus-Operator v0.39.0 and later support TLS natively. + ## Prometheus-Operator v0.39.0 and later support TLS natively. + ## tls: enabled: true # Value must match version names from https://golang.org/pkg/crypto/tls/#pkg-constants tlsMinVersion: VersionTLS13 + # The default webhook port is 10250 in order to work out-of-the-box in GKE private clusters and avoid adding firewall rules. + internalPort: 10250 ## Admission webhook support for PrometheusRules resources added in Prometheus Operator 0.30 can be enabled to prevent incorrectly formatted ## rules from making their way into prometheus and potentially preventing the container from starting admissionWebhooks: failurePolicy: Fail enabled: true + ## A PEM encoded CA bundle which will be used to validate the webhook's server certificate. + ## If unspecified, system trust roots on the apiserver are used. + caBundle: "" ## If enabled, generate a self-signed certificate, then patch the webhook configurations with the generated data. ## On chart upgrades (or if the secret exists) the cert will not be re-generated. You can use this to provide your own ## certs ahead of time if you wish. @@ -1256,6 +1293,12 @@ prometheusOperator: nodeSelector: {} affinity: {} tolerations: [] + # Use certmanager to generate webhook certs + certManager: + enabled: false + # issuerRef: + # name: "issuer" + # kind: "ClusterIssuer" ## Namespaces to scope the interaction of the Prometheus Operator and the apiserver (allow list). ## This is mutually exclusive with denyNamespaces. Setting this to an empty object will disable the configuration @@ -1275,6 +1318,12 @@ prometheusOperator: prometheusInstanceNamespaces: [] thanosRulerInstanceNamespaces: [] + ## The clusterDomain value will be added to the cluster.peer option of the alertmanager. + ## Without this specified option cluster.peer will have value alertmanager-monitoring-alertmanager-0.alertmanager-operated:9094 (default value) + ## With this specified option cluster.peer will have value alertmanager-monitoring-alertmanager-0.alertmanager-operated.namespace.svc.cluster-domain:9094 + ## + # clusterDomain: "cluster.local" + ## Service account for Alertmanager to use. ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ ## @@ -1411,7 +1460,16 @@ prometheusOperator: # values: # - e2e-az1 # - e2e-az2 - + dnsConfig: {} + # nameservers: + # - 1.2.3.4 + # searches: + # - ns1.svc.cluster-domain.example + # - my.dns.search.suffix + # options: + # - name: ndots + # value: "2" + # - name: edns0 securityContext: fsGroup: 65534 runAsGroup: 65534 @@ -1422,22 +1480,23 @@ prometheusOperator: ## image: repository: quay.io/prometheus-operator/prometheus-operator - tag: v0.44.0 + tag: v0.45.0 sha: "" pullPolicy: IfNotPresent - ## Configmap-reload image to use for reloading configmaps + ## Prometheus image to use for prometheuses managed by the operator ## - configmapReloadImage: - repository: docker.io/jimmidyson/configmap-reload - tag: v0.4.0 - sha: "" + # prometheusDefaultBaseImage: quay.io/prometheus/prometheus + + ## Alertmanager image to use for alertmanagers managed by the operator + ## + # alertmanagerDefaultBaseImage: quay.io/prometheus/alertmanager ## Prometheus-config-reloader image to use for config and rule reloading ## prometheusConfigReloaderImage: repository: quay.io/prometheus-operator/prometheus-config-reloader - tag: v0.44.0 + tag: v0.45.0 sha: "" ## Set the prometheus config reloader side-car CPU limit @@ -1446,7 +1505,7 @@ prometheusOperator: ## Set the prometheus config reloader side-car memory limit ## - configReloaderMemory: 25Mi + configReloaderMemory: 50Mi ## Set a Field Selector to filter watched secrets ## @@ -1469,6 +1528,19 @@ prometheus: create: true name: "" + # Service for thanos service discovery on sidecar + # Enable this can make Thanos Query can use + # `--store=dnssrv+_grpc._tcp.${kube-prometheus-stack.fullname}-thanos-discovery.${namespace}.svc.cluster.local` to discovery + # Thanos sidecar on prometheus nodes + # (Please remember to change ${kube-prometheus-stack.fullname} and ${namespace}. Not just copy and paste!) + thanosService: + enabled: false + annotations: {} + labels: {} + portName: grpc + port: 10901 + targetPort: "grpc" + ## Configuration for Prometheus service ## service: @@ -1538,7 +1610,7 @@ prometheus: minAvailable: 1 maxUnavailable: "" - # Ingress exposes thanos sidecar outside the clsuter + # Ingress exposes thanos sidecar outside the cluster thanosIngress: enabled: false @@ -1549,6 +1621,12 @@ prometheus: annotations: {} labels: {} servicePort: 10901 + + ## Port to expose on each node + ## Only used if service.type is 'NodePort' + ## + nodePort: 30901 + ## Hosts must be provided if Ingress is enabled. ## hosts: [] @@ -1559,7 +1637,11 @@ prometheus: paths: [] # - / - ## TLS configuration for Alertmanager Ingress + ## For Kubernetes >= 1.18 you should specify the pathType (determines how Ingress paths should be matched) + ## See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#better-path-matching-with-path-types + # pathType: ImplementationSpecific + + ## TLS configuration for Thanos Ingress ## Secret must be manually created in the namespace ## tls: [] @@ -1589,6 +1671,10 @@ prometheus: paths: [] # - / + ## For Kubernetes >= 1.18 you should specify the pathType (determines how Ingress paths should be matched) + ## See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#better-path-matching-with-path-types + # pathType: ImplementationSpecific + ## TLS configuration for Prometheus Ingress ## Secret must be manually created in the namespace ## @@ -1624,6 +1710,10 @@ prometheus: paths: [] # - / + ## For Kubernetes >= 1.18 you should specify the pathType (determines how Ingress paths should be matched) + ## See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#better-path-matching-with-path-types + # pathType: ImplementationSpecific + ## Secret name containing the TLS certificate for Prometheus per replica ingress ## Secret must be manually created in the namespace tlsSecretName: "" @@ -1641,6 +1731,8 @@ prometheus: ## ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/ podSecurityPolicy: allowedCapabilities: [] + allowedHostPaths: [] + volumes: [] serviceMonitor: ## Scrape interval. If not set, the Prometheus default scrape interval is used. @@ -1714,7 +1806,7 @@ prometheus: ## image: repository: quay.io/prometheus/prometheus - tag: v2.22.1 + tag: v2.24.0 sha: "" ## Tolerations for use with node taints @@ -1726,6 +1818,17 @@ prometheus: # value: "value" # effect: "NoSchedule" + ## If specified, the pod's topology spread constraints. + ## ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/ + ## + topologySpreadConstraints: [] + # - maxSkew: 1 + # topologyKey: topology.kubernetes.io/zone + # whenUnsatisfiable: DoNotSchedule + # labelSelector: + # matchLabels: + # app: prometheus + ## Alertmanagers to which alerts will be sent ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#alertmanagerendpoints ## @@ -1834,9 +1937,12 @@ prometheus: # prometheus: somelabel ## Namespaces to be selected for ServiceMonitor discovery. - ## See https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#namespaceselector for usage ## serviceMonitorNamespaceSelector: {} + ## Example which selects ServiceMonitors in namespaces with label "prometheus" set to "somelabel" + # serviceMonitorNamespaceSelector: + # matchLabels: + # prometheus: somelabel ## If true, a nil or {} value for prometheus.prometheusSpec.podMonitorSelector will cause the ## prometheus resource to be created with selectors based on values in the helm deployment, @@ -1894,10 +2000,20 @@ prometheus: ## paused: false - ## Number of Prometheus replicas desired + ## Number of replicas of each shard to deploy for a Prometheus deployment. + ## Number of replicas multiplied by shards is the total number of Pods created. ## replicas: 1 + ## EXPERIMENTAL: Number of shards to distribute targets onto. + ## Number of replicas multiplied by shards is the total number of Pods created. + ## Note that scaling down shards will not reshard data onto remaining instances, it must be manually moved. + ## Increasing shards will not reshard data either but it will continue to be available from the same instances. + ## To query globally use Thanos sidecar and Thanos querier or remote write data to a central location. + ## Sharding is done on the content of the `__address__` target meta-label. + ## + shards: 1 + ## Log level for Prometheus be configured in ## logLevel: info @@ -2160,7 +2276,11 @@ prometheus: ## labels to transfer from the kubernetes service to the target ## - # targetLabels: "" + # targetLabels: [] + + ## labels to transfer from the kubernetes pods to the target + ## + # podTargetLabels: [] ## Label selector for services to which this ServiceMonitor applies ## diff --git a/charts/kubezero-metrics/update.sh b/charts/kubezero-metrics/update.sh index c757053..4be6f0f 100755 --- a/charts/kubezero-metrics/update.sh +++ b/charts/kubezero-metrics/update.sh @@ -1,8 +1,8 @@ #!/bin/bash -VERSION=12.8.0 +VERSION=13.13.0 rm -rf charts/kube-prometheus-stack curl -L -s -o - https://github.com/prometheus-community/helm-charts/releases/download/kube-prometheus-stack-${VERSION}/kube-prometheus-stack-${VERSION}.tgz | tar xfz - -C charts -patch -p3 -i remove_etcd_grpc_alerts.patch +patch -p3 -i remove_etcd_grpc_alerts.patch --no-backup-if-mismatch