feat: Version bump kube-prometheus-stack for 1.21

This commit is contained in:
Stefan Reimer 2022-01-28 17:19:41 +01:00
parent adf8aff57a
commit 0b855110fd
93 changed files with 2110 additions and 1763 deletions

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: kubezero-ci
description: KubeZero umbrella chart for all things CI
type: application
version: 0.4.20
version: 0.4.23
home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords:
@ -26,7 +26,7 @@ dependencies:
repository: https://dl.gitea.io/charts/
condition: gitea.enabled
- name: jenkins
version: 3.10.3
version: 3.11.3
repository: https://charts.jenkins.io
condition: jenkins.enabled
- name: trivy

View File

@ -1,6 +1,6 @@
# kubezero-ci
![Version: 0.4.20](https://img.shields.io/badge/Version-0.4.20-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
![Version: 0.4.23](https://img.shields.io/badge/Version-0.4.23-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
KubeZero umbrella chart for all things CI
@ -20,7 +20,7 @@ Kubernetes: `>= 1.20.0`
|------------|------|---------|
| https://aquasecurity.github.io/helm-charts/ | trivy | 0.4.9 |
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.5 |
| https://charts.jenkins.io | jenkins | 3.10.3 |
| https://charts.jenkins.io | jenkins | 3.11.3 |
| https://dl.gitea.io/charts/ | gitea | 5.0.0 |
| https://gocd.github.io/helm-chart | gocd | 1.39.4 |
@ -83,7 +83,7 @@ Kubernetes: `>= 1.20.0`
| jenkins.agent.resources.requests.cpu | string | `"512m"` | |
| jenkins.agent.resources.requests.memory | string | `"512Mi"` | |
| jenkins.agent.showRawYaml | bool | `false` | |
| jenkins.agent.tag | string | `"v0.2.4-2"` | |
| jenkins.agent.tag | string | `"v0.2.4-5"` | |
| jenkins.agent.yamlMergeStrategy | string | `"merge"` | |
| jenkins.agent.yamlTemplate | string | `"apiVersion: v1\nkind: Pod\nspec:\n serviceAccountName: jenkins-podman-aws\n containers:\n - name: jnlp\n resources:\n limits:\n github.com/fuse: 1\n volumeMounts:\n - name: aws-token\n mountPath: \"/var/run/secrets/sts.amazonaws.com/serviceaccount/\"\n readOnly: true\n volumes:\n - name: aws-token\n projected:\n sources:\n - serviceAccountToken:\n path: token\n expirationSeconds: 86400\n audience: \"sts.amazonaws.com\""` | |
| jenkins.controller.JCasC.configScripts.zdt-settings | string | `"jenkins:\n noUsageStatistics: true\n disabledAdministrativeMonitors:\n - \"jenkins.security.ResourceDomainRecommendation\"\nunclassified:\n buildDiscarders:\n configuredBuildDiscarders:\n - \"jobBuildDiscarder\"\n - defaultBuildDiscarder:\n discarder:\n logRotator:\n artifactDaysToKeepStr: \"32\"\n artifactNumToKeepStr: \"10\"\n daysToKeepStr: \"100\"\n numToKeepStr: \"10\"\n"` | |
@ -102,6 +102,7 @@ Kubernetes: `>= 1.20.0`
| jenkins.controller.installPlugins[6] | string | `"htmlpublisher:1.28"` | |
| jenkins.controller.installPlugins[7] | string | `"build-discarder:60.v1747b0eb632a"` | |
| jenkins.controller.javaOpts | string | `"-XX:+UseStringDeduplication -Dhudson.model.DirectoryBrowserSupport.CSP=\"sandbox allow-popups; default-src 'none'; img-src 'self' cdn.zero-downtime.net; style-src 'unsafe-inline';\""` | |
| jenkins.controller.jenkinsOpts | string | `"--sessionTimeout=180 --sessionEviction=3600"` | |
| jenkins.controller.prometheus.enabled | bool | `false` | |
| jenkins.controller.resources.limits.cpu | string | `"2000m"` | |
| jenkins.controller.resources.limits.memory | string | `"4096Mi"` | |

View File

@ -16,7 +16,8 @@ spec:
- source:
notIpBlocks:
{{- toYaml .Values.gitea.istio.ipBlocks | nindent 8 }}
when:
- key: connection.sni
values: ["{{ .Values.gitea.istio.url }}"]
to:
- operation:
hosts: ["{{ .Values.gitea.istio.url }}"]
ports: [ "443", "22" ]
{{- end }}

View File

@ -16,7 +16,8 @@ spec:
- source:
notIpBlocks:
{{- toYaml .Values.jenkins.istio.ipBlocks | nindent 8 }}
when:
- key: connection.sni
values: ["{{ .Values.jenkins.istio.url }}"]
to:
- operation:
hosts: ["{{ .Values.jenkins.istio.url }}"]
ports: [ "443" ]
{{- end }}

View File

@ -77,6 +77,7 @@ jenkins:
enableRawHtmlMarkupFormatter: true
# javaOpts: "-Xms512m -Xmx512m"
javaOpts: "-XX:+UseStringDeduplication -Dhudson.model.DirectoryBrowserSupport.CSP=\"sandbox allow-popups; default-src 'none'; img-src 'self' cdn.zero-downtime.net; style-src 'unsafe-inline';\""
jenkinsOpts: "--sessionTimeout=180 --sessionEviction=3600"
resources:
requests:
@ -129,7 +130,7 @@ jenkins:
# Preconfigure agents to use zdt podman requires fuse/overlayfs
agent:
image: public.ecr.aws/zero-downtime/jenkins-podman
tag: v0.2.4-2
tag: v0.2.4-5
resources:
requests:
cpu: "512m"

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: kubezero-metrics
description: KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all Kubernetes integrations.
type: application
version: 0.6.2
version: 0.7.4
home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords:
@ -18,15 +18,15 @@ dependencies:
version: ">= 0.1.4"
repository: https://cdn.zero-downtime.net/charts/
- name: kube-prometheus-stack
version: 23.1.5
version: 30.2.0
# Switch back to upstream once all alerts are fixed eg. etcd gpcr
# repository: https://prometheus-community.github.io/helm-charts
- name: prometheus-adapter
version: 3.0.0
version: 3.0.1
repository: https://prometheus-community.github.io/helm-charts
condition: prometheus-adapter.enabled
- name: prometheus-pushgateway
version: 1.13.0
version: 1.14.0
# Switch back to upstream once namespaces are supported
# repository: https://prometheus-community.github.io/helm-charts
condition: prometheus-pushgateway.enabled

View File

@ -1,6 +1,6 @@
# kubezero-metrics
![Version: 0.5.4](https://img.shields.io/badge/Version-0.5.4-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
![Version: 0.7.0](https://img.shields.io/badge/Version-0.7.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all Kubernetes integrations.
@ -10,18 +10,18 @@ KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all
| Name | Email | Url |
| ---- | ------ | --- |
| Quarky9 | | |
| Stefan Reimer | stefan@zero-downtime.net | |
## Requirements
Kubernetes: `>= 1.18.0`
Kubernetes: `>= 1.20.0`
| Repository | Name | Version |
|------------|------|---------|
| | kube-prometheus-stack | 18.1.0 |
| | prometheus-pushgateway | 1.10.1 |
| | kube-prometheus-stack | 30.2.0 |
| | prometheus-pushgateway | 1.14.0 |
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.4 |
| https://prometheus-community.github.io/helm-charts | prometheus-adapter | 2.17 |
| https://prometheus-community.github.io/helm-charts | prometheus-adapter | 3.0.1 |
## Values
@ -84,7 +84,6 @@ Kubernetes: `>= 1.18.0`
| kube-prometheus-stack.alertmanager.enabled | bool | `false` | |
| kube-prometheus-stack.coreDns.enabled | bool | `true` | |
| kube-prometheus-stack.defaultRules.create | bool | `false` | |
| kube-prometheus-stack.global.rbac.pspEnabled | bool | `false` | |
| kube-prometheus-stack.grafana."grafana.ini"."auth.anonymous".enabled | bool | `true` | |
| kube-prometheus-stack.grafana."grafana.ini".alerting.enabled | bool | `false` | |
| kube-prometheus-stack.grafana."grafana.ini".analytics.check_for_updates | bool | `false` | |
@ -102,13 +101,15 @@ Kubernetes: `>= 1.18.0`
| kube-prometheus-stack.grafana.extraContainerVolumes[0].name | string | `"script-volume"` | |
| kube-prometheus-stack.grafana.initChownData.enabled | bool | `false` | |
| kube-prometheus-stack.grafana.plugins[0] | string | `"grafana-piechart-panel"` | |
| kube-prometheus-stack.grafana.rbac.pspEnabled | bool | `false` | |
| kube-prometheus-stack.grafana.service.portName | string | `"http-grafana"` | |
| kube-prometheus-stack.grafana.sidecar.dashboards.extraMounts[0].mountPath | string | `"/opt/script.sh"` | |
| kube-prometheus-stack.grafana.sidecar.dashboards.extraMounts[0].name | string | `"script-volume"` | |
| kube-prometheus-stack.grafana.sidecar.dashboards.extraMounts[0].subPath | string | `"script.sh"` | |
| kube-prometheus-stack.grafana.sidecar.dashboards.provider.foldersFromFilesStructure | bool | `true` | |
| kube-prometheus-stack.grafana.sidecar.dashboards.script | string | `"/opt/script.sh"` | |
| kube-prometheus-stack.grafana.sidecar.dashboards.searchNamespace | string | `"ALL"` | |
| kube-prometheus-stack.grafana.testFramework.enabled | bool | `false` | |
| kube-prometheus-stack.kube-state-metrics.nodeSelector."node-role.kubernetes.io/master" | string | `""` | |
| kube-prometheus-stack.kube-state-metrics.podSecurityPolicy.enabled | bool | `false` | |
| kube-prometheus-stack.kube-state-metrics.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` | |
| kube-prometheus-stack.kube-state-metrics.tolerations[0].effect | string | `"NoSchedule"` | |
| kube-prometheus-stack.kube-state-metrics.tolerations[0].key | string | `"node-role.kubernetes.io/master"` | |
| kube-prometheus-stack.kubeApiServer.enabled | bool | `true` | |
@ -131,13 +132,12 @@ Kubernetes: `>= 1.18.0`
| kube-prometheus-stack.kubelet.enabled | bool | `true` | |
| kube-prometheus-stack.kubelet.serviceMonitor.cAdvisor | bool | `true` | |
| kube-prometheus-stack.nodeExporter.enabled | bool | `true` | |
| kube-prometheus-stack.nodeExporter.serviceMonitor.relabelings[0].action | string | `"replace"` | |
| kube-prometheus-stack.nodeExporter.serviceMonitor.relabelings[0].regex | string | `"^(.*)$"` | |
| kube-prometheus-stack.nodeExporter.serviceMonitor.relabelings[0].replacement | string | `"$1"` | |
| kube-prometheus-stack.nodeExporter.serviceMonitor.relabelings[0].separator | string | `";"` | |
| kube-prometheus-stack.nodeExporter.serviceMonitor.relabelings[0].sourceLabels[0] | string | `"__meta_kubernetes_pod_node_name"` | |
| kube-prometheus-stack.nodeExporter.serviceMonitor.relabelings[0].targetLabel | string | `"node"` | |
| kube-prometheus-stack.prometheus-node-exporter.rbac.pspEnabled | bool | `false` | |
| kube-prometheus-stack.prometheus-node-exporter.prometheus.monitor.serviceMonitor.relabelings[0].action | string | `"replace"` | |
| kube-prometheus-stack.prometheus-node-exporter.prometheus.monitor.serviceMonitor.relabelings[0].regex | string | `"^(.*)$"` | |
| kube-prometheus-stack.prometheus-node-exporter.prometheus.monitor.serviceMonitor.relabelings[0].replacement | string | `"$1"` | |
| kube-prometheus-stack.prometheus-node-exporter.prometheus.monitor.serviceMonitor.relabelings[0].separator | string | `";"` | |
| kube-prometheus-stack.prometheus-node-exporter.prometheus.monitor.serviceMonitor.relabelings[0].sourceLabels[0] | string | `"__meta_kubernetes_pod_node_name"` | |
| kube-prometheus-stack.prometheus-node-exporter.prometheus.monitor.serviceMonitor.relabelings[0].targetLabel | string | `"node"` | |
| kube-prometheus-stack.prometheus-node-exporter.resources.requests.cpu | string | `"20m"` | |
| kube-prometheus-stack.prometheus-node-exporter.resources.requests.memory | string | `"16Mi"` | |
| kube-prometheus-stack.prometheus.enabled | bool | `true` | |
@ -153,12 +153,12 @@ Kubernetes: `>= 1.18.0`
| kube-prometheus-stack.prometheus.prometheusSpec.storageSpec.volumeClaimTemplate.spec.accessModes[0] | string | `"ReadWriteOnce"` | |
| kube-prometheus-stack.prometheus.prometheusSpec.storageSpec.volumeClaimTemplate.spec.resources.requests.storage | string | `"16Gi"` | |
| kube-prometheus-stack.prometheus.prometheusSpec.walCompression | bool | `true` | |
| kube-prometheus-stack.prometheusOperator.admissionWebhooks.patch.nodeSelector."node-role.kubernetes.io/master" | string | `""` | |
| kube-prometheus-stack.prometheusOperator.admissionWebhooks.patch.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` | |
| kube-prometheus-stack.prometheusOperator.admissionWebhooks.patch.tolerations[0].effect | string | `"NoSchedule"` | |
| kube-prometheus-stack.prometheusOperator.admissionWebhooks.patch.tolerations[0].key | string | `"node-role.kubernetes.io/master"` | |
| kube-prometheus-stack.prometheusOperator.enabled | bool | `true` | |
| kube-prometheus-stack.prometheusOperator.logFormat | string | `"json"` | |
| kube-prometheus-stack.prometheusOperator.nodeSelector."node-role.kubernetes.io/master" | string | `""` | |
| kube-prometheus-stack.prometheusOperator.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` | |
| kube-prometheus-stack.prometheusOperator.resources.limits.memory | string | `"64Mi"` | |
| kube-prometheus-stack.prometheusOperator.resources.requests.cpu | string | `"20m"` | |
| kube-prometheus-stack.prometheusOperator.resources.requests.memory | string | `"32Mi"` | |
@ -166,7 +166,7 @@ Kubernetes: `>= 1.18.0`
| kube-prometheus-stack.prometheusOperator.tolerations[0].key | string | `"node-role.kubernetes.io/master"` | |
| prometheus-adapter.enabled | bool | `true` | |
| prometheus-adapter.logLevel | int | `1` | |
| prometheus-adapter.nodeSelector."node-role.kubernetes.io/master" | string | `""` | |
| prometheus-adapter.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` | |
| prometheus-adapter.prometheus.url | string | `"http://metrics-kube-prometheus-st-prometheus"` | |
| prometheus-adapter.rules.default | bool | `false` | |
| prometheus-adapter.rules.resource.cpu.containerLabel | string | `"container"` | |

View File

@ -6,20 +6,20 @@ annotations:
url: https://github.com/prometheus-operator/kube-prometheus
artifacthub.io/operator: "true"
apiVersion: v2
appVersion: 0.52.0
appVersion: 0.53.1
dependencies:
- condition: kubeStateMetrics.enabled
name: kube-state-metrics
repository: https://prometheus-community.github.io/helm-charts
version: 4.1.*
version: 4.4.*
- condition: nodeExporter.enabled
name: prometheus-node-exporter
repository: https://prometheus-community.github.io/helm-charts
version: 2.2.*
version: 2.5.*
- condition: grafana.enabled
name: grafana
repository: https://grafana.github.io/helm-charts
version: 6.18.*
version: 6.21.*
description: kube-prometheus-stack collects Kubernetes manifests, Grafana dashboards,
and Prometheus rules combined with documentation and scripts to provide easy to
operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus
@ -32,7 +32,11 @@ keywords:
- kube-prometheus
kubeVersion: '>=1.16.0-0'
maintainers:
- email: andrew@quadcorps.co.uk
name: andrewgkew
- name: bismarck
- email: cedric@desaintmartin.fr
name: desaintmartin
- email: gianrubio@gmail.com
name: gianrubio
- email: github.gkarthiks@gmail.com
@ -46,4 +50,4 @@ sources:
- https://github.com/prometheus-community/helm-charts
- https://github.com/prometheus-operator/kube-prometheus
type: application
version: 23.1.5
version: 30.2.0

View File

@ -83,6 +83,61 @@ _See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documen
A major chart version change (like v1.2.3 -> v2.0.0) indicates that there is an incompatible breaking change needing manual actions.
### From 29.x to 30.x
This version updates kube-state-metrics to 4.3.0 and uses the new option `kube-state-metrics.releaseLabel=true` which adds the "release" label to kube-state-metrics labels, making scraping of the metrics by kube-prometheus-stack work out of the box again, independent of the used kube-prometheus-stack release name. If you already set the "release" label via `kube-state-metrics.customLabels` you might have to remove that and use it via the new option.
### From 28.x to 29.x
This version makes scraping port for kube-controller-manager and kube-scheduler dynamic to reflect changes to default serving ports
for those components in Kubernetes versions v1.22 and v1.23 respectively.
If you deploy on clusters using version v1.22+, kube-controller-manager will be scraped over HTTPS on port 10257.
If you deploy on clusters running version v1.23+, kube-scheduler will be scraped over HTTPS on port 10259.
### From 27.x to 28.x
This version disables PodSecurityPolicies by default because they are deprecated in Kubernetes 1.21 and will be removed in Kubernetes 1.25.
If you are using PodSecurityPolicies you can enable the previous behaviour by setting `kube-state-metrics.podSecurityPolicy.enabled`, `prometheus-node-exporter.rbac.pspEnabled`, `grafana.rbac.pspEnabled` and `global.rbac.pspEnabled` to `true`.
### From 26.x to 27.x
This version splits Node Exporter recording and altering rules in separate config values.
Instead of `defaultRules.rules.node` the 2 new variables `defaultRules.rules.nodeExporterAlerting` and `defaultRules.rules.nodeExporterRecording` are used.
Also the following defaultRules.rules has been removed as they had no effect: `kubeApiserverError`, `kubePrometheusNodeAlerting`, `kubernetesAbsent`, `time`.
The ability to set a rubookUrl via `defaultRules.rules.rubookUrl` was reintroduced.
### From 25.x to 26.x
This version enables the prometheus-node-exporter subchart servicemonitor by default again, by setting `prometheus-node-exporter.prometheus.monitor.enabled` to `true`.
### From 24.x to 25.x
This version upgrade to prometheus-operator v0.53.1. It removes support for setting a runbookUrl, since the upstream format for runbooks changed.
```console
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
```
### From 23.x to 24.x
The custom `ServiceMonitor` for the _kube-state-metrics_ & _prometheus-node-exporter_ charts have been removed in favour of the built in sub-chart `ServiceMonitor`; for both sub-charts this means that `ServiceMonitor` customisations happen via the values passed to the chart. If you haven't directly customised this behaviour then there are no changes required to upgrade, but if you have please read the following.
For _kube-state-metrics_ the `ServiceMonitor` customisation is now set via `kube-state-metrics.prometheus.monitor` and the `kubeStateMetrics.serviceMonitor.selfMonitor.enabled` value has moved to `kube-state-metrics.selfMonitor.enabled`.
For _prometheus-node-exporter_ the `ServiceMonitor` customisation is now set via `prometheus-node-exporter.prometheus.monitor` and the `nodeExporter.jobLabel` values has moved to `prometheus-node-exporter.prometheus.monitor.jobLabel`.
### From 22.x to 23.x
Port names have been renamed for Istio's
@ -331,9 +386,9 @@ For more in-depth documentation of configuration options meanings, please see
The prometheus operator does not support annotation-based discovery of services, using the `PodMonitor` or `ServiceMonitor` CRD in its place as they provide far more configuration options.
For information on how to use PodMonitors/ServiceMonitors, please see the documentation on the `prometheus-operator/prometheus-operator` documentation here:
- [ServiceMonitors](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/user-guides/getting-started.md#include-servicemonitors)
- [PodMonitors](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/user-guides/getting-started.md#include-podmonitors)
- [Running Exporters](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/user-guides/running-exporters.md)
- [ServiceMonitors](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/getting-started.md#include-servicemonitors)
- [PodMonitors](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/getting-started.md#include-podmonitors)
- [Running Exporters](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/running-exporters.md)
By default, Prometheus discovers PodMonitors and ServiceMonitors within its namespace, that are labeled with the same release tag as the prometheus-operator release.
Sometimes, you may need to discover custom PodMonitors/ServiceMonitors, for example used to scrape data from third-party applications.

View File

@ -1,5 +1,5 @@
apiVersion: v2
appVersion: 8.3.1
appVersion: 8.3.4
description: The leading tool for querying and visualizing time series and metrics.
home: https://grafana.net
icon: https://raw.githubusercontent.com/grafana/grafana/master/public/img/logo_transparent_400x.png
@ -19,4 +19,4 @@ name: grafana
sources:
- https://github.com/grafana/grafana
type: application
version: 6.18.2
version: 6.21.0

View File

@ -138,7 +138,7 @@ This version requires Helm >= 3.1.0.
| `podLabels` | Pod labels | `{}` |
| `podPortName` | Name of the grafana port on the pod | `grafana` |
| `sidecar.image.repository` | Sidecar image repository | `quay.io/kiwigrid/k8s-sidecar` |
| `sidecar.image.tag` | Sidecar image tag | `1.12.3` |
| `sidecar.image.tag` | Sidecar image tag | `1.15.1` |
| `sidecar.image.sha` | Sidecar image sha (optional) | `""` |
| `sidecar.imagePullPolicy` | Sidecar image pull policy | `IfNotPresent` |
| `sidecar.resources` | Sidecar resources | `{}` |
@ -169,6 +169,8 @@ This version requires Helm >= 3.1.0.
| `sidecar.datasources.labelValue` | Label value that config maps with datasources should have to be added | `nil` |
| `sidecar.datasources.searchNamespace` | Namespaces list. If specified, the sidecar will search for datasources config-maps inside these namespaces.Otherwise the namespace in which the sidecar is running will be used.It's also possible to specify ALL to search in all namespaces. | `nil` |
| `sidecar.datasources.resource` | Should the sidecar looks into secrets, configmaps or both. | `both` |
| `sidecar.datasources.reloadURL` | Full url of datasource configuration reload API endpoint, to invoke after a config-map change | `"http://localhost:3000/api/admin/provisioning/datasources/reload"` |
| `sidecar.datasources.skipReload` | Enabling this omits defining the REQ_URL and REQ_METHOD environment variables | `false` |
| `sidecar.notifiers.enabled` | Enables the cluster wide search for notifiers and adds/updates/deletes them in grafana | `false` |
| `sidecar.notifiers.label` | Label that config maps with notifiers should have to be added | `grafana_notifier` |
| `sidecar.notifiers.searchNamespace` | Namespaces list. If specified, the sidecar will search for notifiers config-maps (or secrets) inside these namespaces.Otherwise the namespace in which the sidecar is running will be used.It's also possible to specify ALL to search in all namespaces. | `nil` |
@ -226,8 +228,9 @@ This version requires Helm >= 3.1.0.
| `imageRenderer.hostAliases` | image-renderer deployment Host Aliases | `[]` |
| `imageRenderer.priorityClassName` | image-renderer deployment priority class | `''` |
| `imageRenderer.service.enabled` | Enable the image-renderer service | `true` |
| `imageRenderer.service.portName` | image-renderer service port name | `'http'` |
| `imageRenderer.service.portName` | image-renderer service port name | `http` |
| `imageRenderer.service.port` | image-renderer service port used by both service and deployment | `8081` |
| `imageRenderer.grafanaProtocol` | Protocol to use for image renderer callback url | `http` |
| `imageRenderer.grafanaSubPath` | Grafana sub path to use for image renderer callback url | `''` |
| `imageRenderer.podPortName` | name of the image-renderer port on the pod | `http` |
| `imageRenderer.revisionHistoryLimit` | number of image-renderer replica sets to keep | `10` |
@ -237,6 +240,7 @@ This version requires Helm >= 3.1.0.
| `networkPolicy.enabled` | Enable creation of NetworkPolicy resources. | `false` |
| `networkPolicy.allowExternal` | Don't require client label for connections | `true` |
| `networkPolicy.explicitNamespacesSelector` | A Kubernetes LabelSelector to explicitly select namespaces from which traffic could be allowed | `{}` |
| `enableKubeBackwardCompatibility` | Enable backward compatibility of kubernetes where pod's defintion version below 1.13 doesn't have the enableServiceLinks option | `false` |
@ -525,7 +529,7 @@ This example uses a CSI driver e.g. retrieving secrets using [Azure Key Vault Pr
## Image Renderer Plug-In
This chart supports enabling [remote image rendering](https://github.com/grafana/grafana-image-renderer/blob/master/docs/remote_rendering_using_docker.md)
This chart supports enabling [remote image rendering](https://github.com/grafana/grafana-image-renderer/blob/master/README.md#run-in-docker)
```yaml
imageRenderer:

View File

@ -16,7 +16,7 @@ hostAliases:
{{- if .Values.priorityClassName }}
priorityClassName: {{ .Values.priorityClassName }}
{{- end }}
{{- if ( or .Values.persistence.enabled .Values.dashboards .Values.sidecar.datasources.enabled .Values.sidecar.notifiers.enabled .Values.extraInitContainers) }}
{{- if ( or .Values.persistence.enabled .Values.dashboards .Values.sidecar.notifiers.enabled .Values.extraInitContainers) }}
initContainers:
{{- end }}
{{- if ( and .Values.persistence.enabled .Values.initChownData.enabled ) }}
@ -77,49 +77,6 @@ initContainers:
readOnly: {{ .readOnly }}
{{- end }}
{{- end }}
{{- if .Values.sidecar.datasources.enabled }}
- name: {{ template "grafana.name" . }}-sc-datasources
{{- if .Values.sidecar.image.sha }}
image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}"
{{- else }}
image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}"
{{- end }}
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
env:
- name: METHOD
value: {{ .Values.sidecar.datasources.watchMethod }}
- name: LABEL
value: "{{ .Values.sidecar.datasources.label }}"
{{- if .Values.sidecar.datasources.labelValue }}
- name: LABEL_VALUE
value: {{ quote .Values.sidecar.datasources.labelValue }}
{{- end }}
- name: FOLDER
value: "/etc/grafana/provisioning/datasources"
- name: RESOURCE
value: {{ quote .Values.sidecar.datasources.resource }}
{{- if .Values.sidecar.enableUniqueFilenames }}
- name: UNIQUE_FILENAMES
value: "{{ .Values.sidecar.enableUniqueFilenames }}"
{{- end }}
{{- if .Values.sidecar.datasources.searchNamespace }}
- name: NAMESPACE
value: "{{ .Values.sidecar.datasources.searchNamespace | join "," }}"
{{- end }}
{{- if .Values.sidecar.skipTlsVerify }}
- name: SKIP_TLS_VERIFY
value: "{{ .Values.sidecar.skipTlsVerify }}"
{{- end }}
resources:
{{ toYaml .Values.sidecar.resources | indent 6 }}
{{- if .Values.sidecar.securityContext }}
securityContext:
{{- toYaml .Values.sidecar.securityContext | nindent 6 }}
{{- end }}
volumeMounts:
- name: sc-datasources-volume
mountPath: "/etc/grafana/provisioning/datasources"
{{- end}}
{{- if .Values.sidecar.notifiers.enabled }}
- name: {{ template "grafana.name" . }}-sc-notifiers
{{- if .Values.sidecar.image.sha }}
@ -168,7 +125,9 @@ imagePullSecrets:
- name: {{ . }}
{{- end}}
{{- end }}
{{- if not .Values.enableKubeBackwardCompatibility }}
enableServiceLinks: {{ .Values.enableServiceLinks }}
{{- end }}
containers:
{{- if .Values.sidecar.dashboards.enabled }}
- name: {{ template "grafana.name" . }}-sc-dashboard
@ -223,6 +182,69 @@ containers:
{{- if .Values.sidecar.dashboards.extraMounts }}
{{- toYaml .Values.sidecar.dashboards.extraMounts | trim | nindent 6}}
{{- end }}
{{- end}}
{{- if .Values.sidecar.datasources.enabled }}
- name: {{ template "grafana.name" . }}-sc-datasources
{{- if .Values.sidecar.image.sha }}
image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}"
{{- else }}
image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}"
{{- end }}
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
env:
- name: METHOD
value: {{ .Values.sidecar.datasources.watchMethod }}
- name: LABEL
value: "{{ .Values.sidecar.datasources.label }}"
{{- if .Values.sidecar.datasources.labelValue }}
- name: LABEL_VALUE
value: {{ quote .Values.sidecar.datasources.labelValue }}
{{- end }}
- name: FOLDER
value: "/etc/grafana/provisioning/datasources"
- name: RESOURCE
value: {{ quote .Values.sidecar.datasources.resource }}
{{- if .Values.sidecar.enableUniqueFilenames }}
- name: UNIQUE_FILENAMES
value: "{{ .Values.sidecar.enableUniqueFilenames }}"
{{- end }}
{{- if .Values.sidecar.datasources.searchNamespace }}
- name: NAMESPACE
value: "{{ .Values.sidecar.datasources.searchNamespace | join "," }}"
{{- end }}
{{- if .Values.sidecar.skipTlsVerify }}
- name: SKIP_TLS_VERIFY
value: "{{ .Values.sidecar.skipTlsVerify }}"
{{- end }}
{{- if and (not .Values.env.GF_SECURITY_ADMIN_USER) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }}
- name: REQ_USERNAME
valueFrom:
secretKeyRef:
name: {{ .Values.admin.existingSecret | default (include "grafana.fullname" .) }}
key: {{ .Values.admin.userKey | default "admin-user" }}
{{- end }}
{{- if and (not .Values.env.GF_SECURITY_ADMIN_PASSWORD) (not .Values.env.GF_SECURITY_ADMIN_PASSWORD__FILE) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }}
- name: REQ_PASSWORD
valueFrom:
secretKeyRef:
name: {{ .Values.admin.existingSecret | default (include "grafana.fullname" .) }}
key: {{ .Values.admin.passwordKey | default "admin-password" }}
{{- end }}
{{- if not .Values.sidecar.datasources.skipReload }}
- name: REQ_URL
value: {{ .Values.sidecar.datasources.reloadURL }}
- name: REQ_METHOD
value: POST
{{- end }}
resources:
{{ toYaml .Values.sidecar.resources | indent 6 }}
{{- if .Values.sidecar.securityContext }}
securityContext:
{{- toYaml .Values.sidecar.securityContext | nindent 6 }}
{{- end }}
volumeMounts:
- name: sc-datasources-volume
mountPath: "/etc/grafana/provisioning/datasources"
{{- end}}
- name: {{ .Chart.Name }}
{{- if .Values.image.sha }}
@ -373,12 +395,12 @@ containers:
name: {{ .Values.smtp.existingSecret }}
key: {{ .Values.smtp.passwordKey | default "password" }}
{{- end }}
{{ if .Values.imageRenderer.enabled }}
{{- if .Values.imageRenderer.enabled }}
- name: GF_RENDERING_SERVER_URL
value: http://{{ template "grafana.fullname" . }}-image-renderer.{{ template "grafana.namespace" . }}:{{ .Values.imageRenderer.service.port }}/render
- name: GF_RENDERING_CALLBACK_URL
value: http://{{ template "grafana.fullname" . }}.{{ template "grafana.namespace" . }}:{{ .Values.service.port }}/{{ .Values.imageRenderer.grafanaSubPath }}
{{ end }}
value: {{ .Values.imageRenderer.grafanaProtocol }}://{{ template "grafana.fullname" . }}.{{ template "grafana.namespace" . }}:{{ .Values.service.port }}/{{ .Values.imageRenderer.grafanaSubPath }}
{{- end }}
- name: GF_PATHS_DATA
value: {{ (get .Values "grafana.ini").paths.data }}
- name: GF_PATHS_LOGS

View File

@ -9,9 +9,6 @@ metadata:
labels:
{{- include "grafana.labels" $ | nindent 4 }}
dashboard-provider: {{ $provider }}
{{- if $.Values.sidecar.dashboards.label }}
{{ $.Values.sidecar.dashboards.label }}: "1"
{{- end }}
{{- if $dashboards }}
data:
{{- $dashboardFound := false }}

View File

@ -14,7 +14,7 @@ metadata:
{{ toYaml . | indent 4 }}
{{- end }}
spec:
{{- if not .Values.autoscaling.enabled }}
{{- if and (not .Values.autoscaling.enabled) (.Values.replicas) }}
replicas: {{ .Values.replicas }}
{{- end }}
revisionHistoryLimit: {{ .Values.revisionHistoryLimit }}

View File

@ -0,0 +1,4 @@
{{ range .Values.extraObjects }}
---
{{ tpl (toYaml .) $ }}
{{ end }}

View File

@ -73,7 +73,7 @@ livenessProbe:
image:
repository: grafana/grafana
tag: 8.3.1
tag: 8.3.4
sha: ""
pullPolicy: IfNotPresent
@ -618,7 +618,7 @@ smtp:
sidecar:
image:
repository: quay.io/kiwigrid/k8s-sidecar
tag: 1.14.2
tag: 1.15.1
sha: ""
imagePullPolicy: IfNotPresent
resources: {}
@ -685,9 +685,12 @@ sidecar:
# It's also possible to specify ALL to search in all namespaces
searchNamespace: null
# Method to use to detect ConfigMap changes. With WATCH the sidecar will do a WATCH requests, with SLEEP it will list all ConfigMaps, then sleep for 60 seconds.
watchMethod: LIST
watchMethod: WATCH
# search in configmap, secret or both
resource: both
# Endpoint to send request to reload datasources
reloadURL: "http://localhost:3000/api/admin/provisioning/datasources/reload"
skipReload: false
notifiers:
enabled: false
# label that the configmaps with notifiers are marked with
@ -726,6 +729,7 @@ imageRenderer:
HTTP_HOST: "0.0.0.0"
# RENDERING_ARGS: --no-sandbox,--disable-gpu,--window-size=1280x758
# RENDERING_MODE: clustered
# IGNORE_HTTPS_ERRORS: true
# image-renderer deployment serviceAccount
serviceAccountName: ""
# image-renderer deployment securityContext
@ -742,6 +746,8 @@ imageRenderer:
# image-renderer service port used by both service and deployment
port: 8081
targetPort: 8081
# If https is enabled in Grafana, this needs to be set as 'https' to correctly configure the callback used in Grafana
grafanaProtocol: http
# In case a sub_path is used this needs to be added to the image renderer callback
grafanaSubPath: ""
# name of the image-renderer port on the pod
@ -786,3 +792,18 @@ networkPolicy:
## - {key: role, operator: In, values: [frontend]}
##
explicitNamespacesSelector: {}
# Enable backward compatibility of kubernetes where version below 1.13 doesn't have the enableServiceLinks option
enableKubeBackwardCompatibility: false
# Create a dynamic manifests via values:
extraObjects: []
# - apiVersion: "kubernetes-client.io/v1"
# kind: ExternalSecret
# metadata:
# name: grafana-secrets
# spec:
# backendType: gcpSecretsManager
# data:
# - key: grafana-admin-password
# name: adminPassword

View File

@ -1,5 +1,5 @@
apiVersion: v2
appVersion: 2.2.4
appVersion: 2.3.0
description: Install kube-state-metrics to generate and expose cluster-level metrics
home: https://github.com/kubernetes/kube-state-metrics/
keywords:
@ -12,8 +12,10 @@ maintainers:
name: tariq1890
- email: manuel@rueg.eu
name: mrueg
- email: davidcalvertfr@gmail.com
name: dotdc
name: kube-state-metrics
sources:
- https://github.com/kubernetes/kube-state-metrics/
type: application
version: 4.1.1
version: 4.4.1

View File

@ -1,6 +0,0 @@
approvers:
- tariq1890
- mrueg
reviewers:
- tariq1890
- mrueg

View File

@ -57,7 +57,7 @@ Create chart name and version as used by the chart label.
Generate basic labels
*/}}
{{- define "kube-state-metrics.labels" }}
helm.sh/chart: {{ include "kube-state-metrics.chart" . }}
helm.sh/chart: {{ template "kube-state-metrics.chart" . }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/component: metrics
app.kubernetes.io/part-of: {{ template "kube-state-metrics.name" . }}
@ -68,6 +68,9 @@ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- if .Values.customLabels }}
{{ toYaml .Values.customLabels }}
{{- end }}
{{- if .Values.releaseLabel }}
release: {{ .Release.Name }}
{{- end }}
{{- end }}
{{/*

View File

@ -9,16 +9,7 @@ kind: ClusterRole
{{- end }}
metadata:
labels:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" $ }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/component: metrics
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: "{{ $.Chart.Version }}"
app.kubernetes.io/part-of: {{ template "kube-state-metrics.name" $ }}
helm.sh/chart: {{ $.Chart.Name }}-{{ $.Chart.Version }}
{{- if $.Values.customLabels }}
{{ toYaml $.Values.customLabels | nindent 4 }}
{{- end }}
{{- include "kube-state-metrics.labels" $ | indent 4 }}
name: {{ template "kube-state-metrics.fullname" $ }}
{{- if eq $.Values.rbac.useClusterRole false }}
namespace: {{ . }}

View File

@ -5,7 +5,7 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
{{- include "kube-state-metrics.labels" . | indent 4 }}
{{- include "kube-state-metrics.labels" $ | indent 4 }}
name: {{ template "kube-state-metrics.fullname" $ }}
namespace: {{ . }}
roleRef:

View File

@ -10,10 +10,14 @@ metadata:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
jobLabel: app.kubernetes.io/name
jobLabel: {{ default "app.kubernetes.io/name" .Values.prometheus.monitor.jobLabel }}
selector:
matchLabels:
{{- if .Values.prometheus.monitor.selectorOverride }}
{{ toYaml .Values.prometheus.monitor.selectorOverride | indent 6 }}
{{ else }}
{{- include "kube-state-metrics.selectorLabels" . | indent 6 }}
{{- end }}
endpoints:
- port: http
{{- if .Values.prometheus.monitor.interval }}

View File

@ -2,7 +2,7 @@
prometheusScrape: true
image:
repository: k8s.gcr.io/kube-state-metrics/kube-state-metrics
tag: v2.2.4
tag: v2.3.0
pullPolicy: IfNotPresent
imagePullSecrets: []
@ -34,6 +34,9 @@ service:
customLabels: {}
# app: kube-state-metrics
## set to true to add the release label so scraping of the servicemonitor with kube-prometheus-stack works out of the box
releaseLabel: false
hostNetwork: false
rbac:
@ -65,9 +68,11 @@ prometheus:
enabled: false
additionalLabels: {}
namespace: ""
jobLabel: ""
interval: ""
scrapeTimeout: ""
proxyUrl: ""
selectorOverride: {}
honorLabels: false
metricRelabelings: []
relabelings: []

View File

@ -1,5 +1,5 @@
apiVersion: v2
appVersion: 1.2.2
appVersion: 1.3.1
description: A Helm chart for prometheus node-exporter
home: https://github.com/prometheus/node_exporter/
keywords:
@ -14,4 +14,4 @@ name: prometheus-node-exporter
sources:
- https://github.com/prometheus/node_exporter/
type: application
version: 2.2.2
version: 2.5.0

View File

@ -54,6 +54,13 @@ Create the name of the service account to use
{{- end -}}
{{- end -}}
{{/*
The image to use
*/}}
{{- define "prometheus-node-exporter.image" -}}
{{- printf "%s:%s" .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) }}
{{- end }}
{{/*
Allow the release namespace to be overridden for multi-namespace deployments in combined charts
*/}}

View File

@ -36,7 +36,7 @@ spec:
{{- end }}
containers:
- name: node-exporter
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
image: {{ include "prometheus-node-exporter.image" . }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
args:
- --path.procfs=/host/proc

View File

@ -1,35 +0,0 @@
{{- if .Values.prometheus.monitor.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ template "prometheus-node-exporter.fullname" . }}
namespace: {{ template "prometheus-node-exporter.monitor-namespace" . }}
labels: {{ include "prometheus-node-exporter.labels" . | indent 4 }}
{{- if .Values.prometheus.monitor.additionalLabels }}
{{ toYaml .Values.prometheus.monitor.additionalLabels | indent 4 }}
{{- end }}
spec:
selector:
matchLabels:
app: {{ template "prometheus-node-exporter.name" . }}
release: {{ .Release.Name }}
endpoints:
- port: {{ .Values.service.portName }}
scheme: {{ $.Values.prometheus.monitor.scheme }}
{{- if $.Values.prometheus.monitor.bearerTokenFile }}
bearerTokenFile: {{ $.Values.prometheus.monitor.bearerTokenFile }}
{{- end }}
{{- if $.Values.prometheus.monitor.tlsConfig }}
tlsConfig: {{ toYaml $.Values.prometheus.monitor.tlsConfig | nindent 8 }}
{{- end }}
{{- if .Values.prometheus.monitor.proxyUrl }}
proxyUrl: {{ .Values.prometheus.monitor.proxyUrl}}
{{- end }}
{{- if .Values.prometheus.monitor.scrapeTimeout }}
scrapeTimeout: {{ .Values.prometheus.monitor.scrapeTimeout }}
{{- end }}
{{- if .Values.prometheus.monitor.relabelings }}
relabelings:
{{ toYaml .Values.prometheus.monitor.relabelings | indent 6 }}
{{- end }}
{{- end }}

View File

@ -12,12 +12,6 @@ metadata:
{{- end}}
spec:
privileged: false
# Required to prevent escalations to root.
# allowPrivilegeEscalation: false
# This is redundant with non-root + disallow privilege escalation,
# but we can provide it for defense in depth.
#requiredDropCapabilities:
# - ALL
# Allow core volume types.
volumes:
- 'configMap'
@ -42,13 +36,13 @@ spec:
supplementalGroups:
rule: 'MustRunAs'
ranges:
# Forbid adding the root group.
# Allow adding the root group.
- min: 0
max: 65535
fsGroup:
rule: 'MustRunAs'
ranges:
# Forbid adding the root group.
# Allow adding the root group.
- min: 0
max: 65535
readOnlyRootFilesystem: false

View File

@ -0,0 +1,48 @@
{{- if .Values.prometheus.monitor.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ template "prometheus-node-exporter.fullname" . }}
namespace: {{ template "prometheus-node-exporter.monitor-namespace" . }}
labels: {{ include "prometheus-node-exporter.labels" . | indent 4 }}
{{- if .Values.prometheus.monitor.additionalLabels }}
{{- toYaml .Values.prometheus.monitor.additionalLabels | nindent 4 }}
{{- end }}
spec:
jobLabel: {{ default "app.kubernetes.io/name" .Values.prometheus.monitor.jobLabel }}
selector:
matchLabels:
{{- if .Values.prometheus.monitor.selectorOverride }}
{{ toYaml .Values.prometheus.monitor.selectorOverride | indent 6 }}
{{ else }}
app: {{ template "prometheus-node-exporter.name" . }}
release: {{ .Release.Name }}
{{- end }}
endpoints:
- port: {{ .Values.service.portName }}
scheme: {{ .Values.prometheus.monitor.scheme }}
{{- with .Values.prometheus.monitor.bearerTokenFile }}
bearerTokenFile: {{ . }}
{{- end }}
{{- with .Values.prometheus.monitor.tlsConfig }}
tlsConfig:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.prometheus.monitor.proxyUrl }}
proxyUrl: {{ . }}
{{- end }}
{{- with .Values.prometheus.monitor.interval }}
interval: {{ . }}
{{- end }}
{{- with .Values.prometheus.monitor.scrapeTimeout }}
scrapeTimeout: {{ . }}
{{- end }}
{{- with .Values.prometheus.monitor.relabelings }}
relabelings:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.prometheus.monitor.metricRelabelings }}
metricRelabelings:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}

View File

@ -3,7 +3,8 @@
# Declare variables to be passed into your templates.
image:
repository: quay.io/prometheus/node-exporter
tag: v1.2.2
# Overrides the image tag whose default is {{ printf "v%s" .Chart.AppVersion }}
tag: ""
pullPolicy: IfNotPresent
service:
@ -21,6 +22,9 @@ prometheus:
enabled: false
additionalLabels: {}
namespace: ""
jobLabel: ""
scheme: http
bearerTokenFile:
tlsConfig: {}
@ -29,7 +33,13 @@ prometheus:
##
proxyUrl: ""
## Override serviceMonitor selector
##
selectorOverride: {}
relabelings: []
metricRelabelings: []
interval: ""
scrapeTimeout: 10s
## Customize the updateStrategy if set

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
---
apiVersion: apiextensions.k8s.io/v1
@ -63,13 +63,24 @@ spec:
items:
description: Matcher defines how to match on alert's labels.
properties:
matchType:
description: Match operation available with AlertManager
>= v0.22.0 and takes precedence over Regex (deprecated)
if non-empty.
enum:
- '!='
- =
- =~
- '!~'
type: string
name:
description: Label to match.
minLength: 1
type: string
regex:
description: Whether to match on equality (false) or regular-expression
(true).
(true). Deprecated as of AlertManager >= v0.22.0 where
a user should use MatchType instead.
type: boolean
value:
description: Label value to match.
@ -85,13 +96,24 @@ spec:
items:
description: Matcher defines how to match on alert's labels.
properties:
matchType:
description: Match operation available with AlertManager
>= v0.22.0 and takes precedence over Regex (deprecated)
if non-empty.
enum:
- '!='
- =
- =~
- '!~'
type: string
name:
description: Label to match.
minLength: 1
type: string
regex:
description: Whether to match on equality (false) or regular-expression
(true).
(true). Deprecated as of AlertManager >= v0.22.0 where
a user should use MatchType instead.
type: boolean
value:
description: Label value to match.
@ -102,6 +124,88 @@ spec:
type: array
type: object
type: array
muteTimeIntervals:
description: List of MuteTimeInterval specifying when the routes should
be muted.
items:
description: MuteTimeInterval specifies the periods in time when
notifications will be muted
properties:
name:
description: Name of the time interval
type: string
timeIntervals:
description: TimeIntervals is a list of TimeInterval
items:
description: TimeInterval describes intervals of time
properties:
daysOfMonth:
description: DaysOfMonth is a list of DayOfMonthRange
items:
description: DayOfMonthRange is an inclusive range of
days of the month beginning at 1
properties:
end:
description: End of the inclusive range
maximum: 31
minimum: -31
type: integer
start:
description: Start of the inclusive range
maximum: 31
minimum: -31
type: integer
type: object
type: array
months:
description: Months is a list of MonthRange
items:
description: MonthRange is an inclusive range of months
of the year beginning in January Months can be specified
by name (e.g 'January') by numerical month (e.g '1')
or as an inclusive range (e.g 'January:March', '1:3',
'1:March')
pattern: ^((?i)january|february|march|april|may|june|july|august|september|october|november|december|[1-12])(?:((:((?i)january|february|march|april|may|june|july|august|september|october|november|december|[1-12]))$)|$)
type: string
type: array
times:
description: Times is a list of TimeRange
items:
description: TimeRange defines a start and end time
in 24hr format
properties:
endTime:
description: EndTime is the end time in 24hr format.
pattern: ^((([01][0-9])|(2[0-3])):[0-5][0-9])$|(^24:00$)
type: string
startTime:
description: StartTime is the start time in 24hr
format.
pattern: ^((([01][0-9])|(2[0-3])):[0-5][0-9])$|(^24:00$)
type: string
type: object
type: array
weekdays:
description: Weekdays is a list of WeekdayRange
items:
description: WeekdayRange is an inclusive range of days
of the week beginning on Sunday Days can be specified
by name (e.g 'Sunday') or as an inclusive range (e.g
'Monday:Friday')
pattern: ^((?i)sun|mon|tues|wednes|thurs|fri|satur)day(?:((:(sun|mon|tues|wednes|thurs|fri|satur)day)$)|$)
type: string
type: array
years:
description: Years is a list of YearRange
items:
description: YearRange is an inclusive range of years
pattern: ^2\d{3}(?::2\d{3}|$)
type: string
type: array
type: object
type: array
type: object
type: array
receivers:
description: List of receivers.
items:
@ -952,6 +1056,43 @@ spec:
type: string
type: object
type: object
pagerDutyImageConfigs:
description: A list of image details to attach that provide
further detail about an incident.
items:
description: PagerDutyImageConfig attaches images to
an incident
properties:
alt:
description: Alt is the optional alternative text
for the image.
type: string
href:
description: Optional URL; makes the image a clickable
link.
type: string
src:
description: Src of the image being attached to
the incident
type: string
type: object
type: array
pagerDutyLinkConfigs:
description: A list of link details to attach that provide
further detail about an incident.
items:
description: PagerDutyLinkConfig attaches text links
to an incident
properties:
alt:
description: Text that describes the purpose of
the link, and can be used as the link's text.
type: string
href:
description: Href is the URL of the link to be attached
type: string
type: object
type: array
routingKey:
description: The secret's key that contains the PagerDuty
integration key (when using Events API v2). Either this
@ -1020,6 +1161,7 @@ spec:
description: How long your notification will continue
to be retried for, unless the user acknowledges the
notification.
pattern: ^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$
type: string
html:
description: Whether notification message is HTML or plain
@ -1272,6 +1414,7 @@ spec:
description: How often the Pushover servers will send
the same notification to the user. Must be at least
30 seconds.
pattern: ^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$
type: string
sendResolved:
description: Whether or not to notify about resolved alerts.
@ -2614,19 +2757,21 @@ spec:
to true for the first-level route by the Prometheus operator.
type: boolean
groupBy:
description: List of labels to group by.
description: List of labels to group by. Labels must not be repeated
(unique list). Special label "..." (aggregate by all possible
labels), if provided, must be the only element in the list.
items:
type: string
type: array
groupInterval:
description: How long to wait before sending an updated notification.
Must match the regular expression `[0-9]+(ms|s|m|h)` (milliseconds
seconds minutes hours).
description: 'How long to wait before sending an updated notification.
Must match the regular expression`^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$`
Example: "5m"'
type: string
groupWait:
description: How long to wait before sending the initial notification.
Must match the regular expression `[0-9]+(ms|s|m|h)` (milliseconds
seconds minutes hours).
description: 'How long to wait before sending the initial notification.
Must match the regular expression`^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$`
Example: "30s"'
type: string
matchers:
description: 'List of matchers that the alerts labels should
@ -2636,13 +2781,24 @@ spec:
items:
description: Matcher defines how to match on alert's labels.
properties:
matchType:
description: Match operation available with AlertManager
>= v0.22.0 and takes precedence over Regex (deprecated)
if non-empty.
enum:
- '!='
- =
- =~
- '!~'
type: string
name:
description: Label to match.
minLength: 1
type: string
regex:
description: Whether to match on equality (false) or regular-expression
(true).
(true). Deprecated as of AlertManager >= v0.22.0 where
a user should use MatchType instead.
type: boolean
value:
description: Label value to match.
@ -2651,14 +2807,27 @@ spec:
- name
type: object
type: array
muteTimeIntervals:
description: 'Note: this comment applies to the field definition
above but appears below otherwise it gets included in the generated
manifest. CRD schema doesn''t support self-referential types
for now (see https://github.com/kubernetes/kubernetes/issues/62872).
We have to use an alternative type to circumvent the limitation.
The downside is that the Kube API can''t validate the data beyond
the fact that it is a valid JSON representation. MuteTimeIntervals
is a list of MuteTimeInterval names that will mute this route
when matched,'
items:
type: string
type: array
receiver:
description: Name of the receiver for this route. If not empty,
it should be listed in the `receivers` field.
type: string
repeatInterval:
description: How long to wait before repeating the last notification.
Must match the regular expression `[0-9]+(ms|s|m|h)` (milliseconds
seconds minutes hours).
description: 'How long to wait before repeating the last notification.
Must match the regular expression`^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$`
Example: "4h"'
type: string
routes:
description: Child routes.

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml
---
apiVersion: apiextensions.k8s.io/v1
@ -1225,8 +1225,7 @@ spec:
info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks'
properties:
exec:
description: One and only one of the following should
be specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -1287,9 +1286,10 @@ spec:
- port
type: object
tcpSocket:
description: 'TCPSocket specifies an action involving
a TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: Deprecated. TCPSocket is NOT supported
as a LifecycleHandler and kept for the backward compatibility.
There are no validation of this field and lifecycle
hooks will fail in runtime when tcp handler is specified.
properties:
host:
description: 'Optional: Host name to connect to,
@ -1312,18 +1312,16 @@ spec:
is terminated due to an API request or management event
such as liveness/startup probe failure, preemption, resource
contention, etc. The handler is not called if the container
crashes or exits. The reason for termination is passed
to the handler. The Pod''s termination grace period countdown
begins before the PreStop hooked is executed. Regardless
of the outcome of the handler, the container will eventually
terminate within the Pod''s termination grace period.
Other management of the container blocks until the hook
completes or until the termination grace period is reached.
More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks'
crashes or exits. The Pod''s termination grace period
countdown begins before the PreStop hook is executed.
Regardless of the outcome of the handler, the container
will eventually terminate within the Pod''s termination
grace period (unless delayed by finalizers). Other management
of the container blocks until the hook completes or until
the termination grace period is reached. More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks'
properties:
exec:
description: One and only one of the following should
be specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -1384,9 +1382,10 @@ spec:
- port
type: object
tcpSocket:
description: 'TCPSocket specifies an action involving
a TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: Deprecated. TCPSocket is NOT supported
as a LifecycleHandler and kept for the backward compatibility.
There are no validation of this field and lifecycle
hooks will fail in runtime when tcp handler is specified.
properties:
host:
description: 'Optional: Host name to connect to,
@ -1411,8 +1410,7 @@ spec:
info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
properties:
exec:
description: One and only one of the following should be
specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -1433,6 +1431,25 @@ spec:
to 3. Minimum value is 1.
format: int32
type: integer
grpc:
description: GRPC specifies an action involving a GRPC port.
This is an alpha field and requires enabling GRPCContainerProbe
feature gate.
properties:
port:
description: Port number of the gRPC service. Number
must be in the range 1 to 65535.
format: int32
type: integer
service:
description: "Service is the name of the service to
place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
\n If this is not specified, the default behavior
is defined by gRPC."
type: string
required:
- port
type: object
httpGet:
description: HTTPGet specifies the http request to perform.
properties:
@ -1496,9 +1513,8 @@ spec:
format: int32
type: integer
tcpSocket:
description: 'TCPSocket specifies an action involving a
TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: TCPSocket specifies an action involving a TCP
port.
properties:
host:
description: 'Optional: Host name to connect to, defaults
@ -1596,8 +1612,7 @@ spec:
fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
properties:
exec:
description: One and only one of the following should be
specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -1618,6 +1633,25 @@ spec:
to 3. Minimum value is 1.
format: int32
type: integer
grpc:
description: GRPC specifies an action involving a GRPC port.
This is an alpha field and requires enabling GRPCContainerProbe
feature gate.
properties:
port:
description: Port number of the gRPC service. Number
must be in the range 1 to 65535.
format: int32
type: integer
service:
description: "Service is the name of the service to
place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
\n If this is not specified, the default behavior
is defined by gRPC."
type: string
required:
- port
type: object
httpGet:
description: HTTPGet specifies the http request to perform.
properties:
@ -1681,9 +1715,8 @@ spec:
format: int32
type: integer
tcpSocket:
description: 'TCPSocket specifies an action involving a
TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: TCPSocket specifies an action involving a TCP
port.
properties:
host:
description: 'Optional: Host name to connect to, defaults
@ -1764,12 +1797,14 @@ spec:
This bool directly controls if the no_new_privs flag will
be set on the container process. AllowPrivilegeEscalation
is true always when the container is: 1) run as Privileged
2) has CAP_SYS_ADMIN'
2) has CAP_SYS_ADMIN Note that this field cannot be set
when spec.os.name is windows.'
type: boolean
capabilities:
description: The capabilities to add/drop when running containers.
Defaults to the default set of capabilities granted by
the container runtime.
the container runtime. Note that this field cannot be
set when spec.os.name is windows.
properties:
add:
description: Added capabilities
@ -1789,25 +1824,29 @@ spec:
privileged:
description: Run container in privileged mode. Processes
in privileged containers are essentially equivalent to
root on the host. Defaults to false.
root on the host. Defaults to false. Note that this field
cannot be set when spec.os.name is windows.
type: boolean
procMount:
description: procMount denotes the type of proc mount to
use for the containers. The default is DefaultProcMount
which uses the container runtime defaults for readonly
paths and masked paths. This requires the ProcMountType
feature flag to be enabled.
feature flag to be enabled. Note that this field cannot
be set when spec.os.name is windows.
type: string
readOnlyRootFilesystem:
description: Whether this container has a read-only root
filesystem. Default is false.
filesystem. Default is false. Note that this field cannot
be set when spec.os.name is windows.
type: boolean
runAsGroup:
description: The GID to run the entrypoint of the container
process. Uses runtime default if unset. May also be set
in PodSecurityContext. If set in both SecurityContext
and PodSecurityContext, the value specified in SecurityContext
takes precedence.
takes precedence. Note that this field cannot be set when
spec.os.name is windows.
format: int64
type: integer
runAsNonRoot:
@ -1825,7 +1864,8 @@ spec:
process. Defaults to user specified in image metadata
if unspecified. May also be set in PodSecurityContext. If
set in both SecurityContext and PodSecurityContext, the
value specified in SecurityContext takes precedence.
value specified in SecurityContext takes precedence. Note
that this field cannot be set when spec.os.name is windows.
format: int64
type: integer
seLinuxOptions:
@ -1834,7 +1874,8 @@ spec:
random SELinux context for each container. May also be
set in PodSecurityContext. If set in both SecurityContext
and PodSecurityContext, the value specified in SecurityContext
takes precedence.
takes precedence. Note that this field cannot be set when
spec.os.name is windows.
properties:
level:
description: Level is SELinux level label that applies
@ -1857,6 +1898,8 @@ spec:
description: The seccomp options to use by this container.
If seccomp options are provided at both the pod & container
level, the container options override the pod options.
Note that this field cannot be set when spec.os.name is
windows.
properties:
localhostProfile:
description: localhostProfile indicates a profile defined
@ -1882,6 +1925,8 @@ spec:
containers. If unspecified, the options from the PodSecurityContext
will be used. If set in both SecurityContext and PodSecurityContext,
the value specified in SecurityContext takes precedence.
Note that this field cannot be set when spec.os.name is
linux.
properties:
gmsaCredentialSpec:
description: GMSACredentialSpec is where the GMSA admission
@ -1927,8 +1972,7 @@ spec:
This cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
properties:
exec:
description: One and only one of the following should be
specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -1949,6 +1993,25 @@ spec:
to 3. Minimum value is 1.
format: int32
type: integer
grpc:
description: GRPC specifies an action involving a GRPC port.
This is an alpha field and requires enabling GRPCContainerProbe
feature gate.
properties:
port:
description: Port number of the gRPC service. Number
must be in the range 1 to 65535.
format: int32
type: integer
service:
description: "Service is the name of the service to
place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
\n If this is not specified, the default behavior
is defined by gRPC."
type: string
required:
- port
type: object
httpGet:
description: HTTPGet specifies the http request to perform.
properties:
@ -2012,9 +2075,8 @@ spec:
format: int32
type: integer
tcpSocket:
description: 'TCPSocket specifies an action involving a
TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: TCPSocket specifies an action involving a TCP
port.
properties:
host:
description: 'Optional: Host name to connect to, defaults
@ -2419,8 +2481,7 @@ spec:
info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks'
properties:
exec:
description: One and only one of the following should
be specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -2481,9 +2542,10 @@ spec:
- port
type: object
tcpSocket:
description: 'TCPSocket specifies an action involving
a TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: Deprecated. TCPSocket is NOT supported
as a LifecycleHandler and kept for the backward compatibility.
There are no validation of this field and lifecycle
hooks will fail in runtime when tcp handler is specified.
properties:
host:
description: 'Optional: Host name to connect to,
@ -2506,18 +2568,16 @@ spec:
is terminated due to an API request or management event
such as liveness/startup probe failure, preemption, resource
contention, etc. The handler is not called if the container
crashes or exits. The reason for termination is passed
to the handler. The Pod''s termination grace period countdown
begins before the PreStop hooked is executed. Regardless
of the outcome of the handler, the container will eventually
terminate within the Pod''s termination grace period.
Other management of the container blocks until the hook
completes or until the termination grace period is reached.
More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks'
crashes or exits. The Pod''s termination grace period
countdown begins before the PreStop hook is executed.
Regardless of the outcome of the handler, the container
will eventually terminate within the Pod''s termination
grace period (unless delayed by finalizers). Other management
of the container blocks until the hook completes or until
the termination grace period is reached. More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks'
properties:
exec:
description: One and only one of the following should
be specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -2578,9 +2638,10 @@ spec:
- port
type: object
tcpSocket:
description: 'TCPSocket specifies an action involving
a TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: Deprecated. TCPSocket is NOT supported
as a LifecycleHandler and kept for the backward compatibility.
There are no validation of this field and lifecycle
hooks will fail in runtime when tcp handler is specified.
properties:
host:
description: 'Optional: Host name to connect to,
@ -2605,8 +2666,7 @@ spec:
info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
properties:
exec:
description: One and only one of the following should be
specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -2627,6 +2687,25 @@ spec:
to 3. Minimum value is 1.
format: int32
type: integer
grpc:
description: GRPC specifies an action involving a GRPC port.
This is an alpha field and requires enabling GRPCContainerProbe
feature gate.
properties:
port:
description: Port number of the gRPC service. Number
must be in the range 1 to 65535.
format: int32
type: integer
service:
description: "Service is the name of the service to
place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
\n If this is not specified, the default behavior
is defined by gRPC."
type: string
required:
- port
type: object
httpGet:
description: HTTPGet specifies the http request to perform.
properties:
@ -2690,9 +2769,8 @@ spec:
format: int32
type: integer
tcpSocket:
description: 'TCPSocket specifies an action involving a
TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: TCPSocket specifies an action involving a TCP
port.
properties:
host:
description: 'Optional: Host name to connect to, defaults
@ -2790,8 +2868,7 @@ spec:
fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
properties:
exec:
description: One and only one of the following should be
specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -2812,6 +2889,25 @@ spec:
to 3. Minimum value is 1.
format: int32
type: integer
grpc:
description: GRPC specifies an action involving a GRPC port.
This is an alpha field and requires enabling GRPCContainerProbe
feature gate.
properties:
port:
description: Port number of the gRPC service. Number
must be in the range 1 to 65535.
format: int32
type: integer
service:
description: "Service is the name of the service to
place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
\n If this is not specified, the default behavior
is defined by gRPC."
type: string
required:
- port
type: object
httpGet:
description: HTTPGet specifies the http request to perform.
properties:
@ -2875,9 +2971,8 @@ spec:
format: int32
type: integer
tcpSocket:
description: 'TCPSocket specifies an action involving a
TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: TCPSocket specifies an action involving a TCP
port.
properties:
host:
description: 'Optional: Host name to connect to, defaults
@ -2958,12 +3053,14 @@ spec:
This bool directly controls if the no_new_privs flag will
be set on the container process. AllowPrivilegeEscalation
is true always when the container is: 1) run as Privileged
2) has CAP_SYS_ADMIN'
2) has CAP_SYS_ADMIN Note that this field cannot be set
when spec.os.name is windows.'
type: boolean
capabilities:
description: The capabilities to add/drop when running containers.
Defaults to the default set of capabilities granted by
the container runtime.
the container runtime. Note that this field cannot be
set when spec.os.name is windows.
properties:
add:
description: Added capabilities
@ -2983,25 +3080,29 @@ spec:
privileged:
description: Run container in privileged mode. Processes
in privileged containers are essentially equivalent to
root on the host. Defaults to false.
root on the host. Defaults to false. Note that this field
cannot be set when spec.os.name is windows.
type: boolean
procMount:
description: procMount denotes the type of proc mount to
use for the containers. The default is DefaultProcMount
which uses the container runtime defaults for readonly
paths and masked paths. This requires the ProcMountType
feature flag to be enabled.
feature flag to be enabled. Note that this field cannot
be set when spec.os.name is windows.
type: string
readOnlyRootFilesystem:
description: Whether this container has a read-only root
filesystem. Default is false.
filesystem. Default is false. Note that this field cannot
be set when spec.os.name is windows.
type: boolean
runAsGroup:
description: The GID to run the entrypoint of the container
process. Uses runtime default if unset. May also be set
in PodSecurityContext. If set in both SecurityContext
and PodSecurityContext, the value specified in SecurityContext
takes precedence.
takes precedence. Note that this field cannot be set when
spec.os.name is windows.
format: int64
type: integer
runAsNonRoot:
@ -3019,7 +3120,8 @@ spec:
process. Defaults to user specified in image metadata
if unspecified. May also be set in PodSecurityContext. If
set in both SecurityContext and PodSecurityContext, the
value specified in SecurityContext takes precedence.
value specified in SecurityContext takes precedence. Note
that this field cannot be set when spec.os.name is windows.
format: int64
type: integer
seLinuxOptions:
@ -3028,7 +3130,8 @@ spec:
random SELinux context for each container. May also be
set in PodSecurityContext. If set in both SecurityContext
and PodSecurityContext, the value specified in SecurityContext
takes precedence.
takes precedence. Note that this field cannot be set when
spec.os.name is windows.
properties:
level:
description: Level is SELinux level label that applies
@ -3051,6 +3154,8 @@ spec:
description: The seccomp options to use by this container.
If seccomp options are provided at both the pod & container
level, the container options override the pod options.
Note that this field cannot be set when spec.os.name is
windows.
properties:
localhostProfile:
description: localhostProfile indicates a profile defined
@ -3076,6 +3181,8 @@ spec:
containers. If unspecified, the options from the PodSecurityContext
will be used. If set in both SecurityContext and PodSecurityContext,
the value specified in SecurityContext takes precedence.
Note that this field cannot be set when spec.os.name is
linux.
properties:
gmsaCredentialSpec:
description: GMSACredentialSpec is where the GMSA admission
@ -3121,8 +3228,7 @@ spec:
This cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
properties:
exec:
description: One and only one of the following should be
specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -3143,6 +3249,25 @@ spec:
to 3. Minimum value is 1.
format: int32
type: integer
grpc:
description: GRPC specifies an action involving a GRPC port.
This is an alpha field and requires enabling GRPCContainerProbe
feature gate.
properties:
port:
description: Port number of the gRPC service. Number
must be in the range 1 to 65535.
format: int32
type: integer
service:
description: "Service is the name of the service to
place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
\n If this is not specified, the default behavior
is defined by gRPC."
type: string
required:
- port
type: object
httpGet:
description: HTTPGet specifies the http request to perform.
properties:
@ -3206,9 +3331,8 @@ spec:
format: int32
type: integer
tcpSocket:
description: 'TCPSocket specifies an action involving a
TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: TCPSocket specifies an action involving a TCP
port.
properties:
host:
description: 'Optional: Host name to connect to, defaults
@ -3486,7 +3610,8 @@ spec:
set (new files created in the volume will be owned by FSGroup)
3. The permission bits are OR'd with rw-rw---- \n If unset,
the Kubelet will not modify the ownership and permissions of
any volume."
any volume. Note that this field cannot be set when spec.os.name
is windows."
format: int64
type: integer
fsGroupChangePolicy:
@ -3496,13 +3621,15 @@ spec:
support fsGroup based ownership(and permissions). It will have
no effect on ephemeral volume types such as: secret, configmaps
and emptydir. Valid values are "OnRootMismatch" and "Always".
If not specified, "Always" is used.'
If not specified, "Always" is used. Note that this field cannot
be set when spec.os.name is windows.'
type: string
runAsGroup:
description: The GID to run the entrypoint of the container process.
Uses runtime default if unset. May also be set in SecurityContext. If
set in both SecurityContext and PodSecurityContext, the value
specified in SecurityContext takes precedence for that container.
Note that this field cannot be set when spec.os.name is windows.
format: int64
type: integer
runAsNonRoot:
@ -3519,7 +3646,8 @@ spec:
Defaults to user specified in image metadata if unspecified.
May also be set in SecurityContext. If set in both SecurityContext
and PodSecurityContext, the value specified in SecurityContext
takes precedence for that container.
takes precedence for that container. Note that this field cannot
be set when spec.os.name is windows.
format: int64
type: integer
seLinuxOptions:
@ -3528,6 +3656,7 @@ spec:
SELinux context for each container. May also be set in SecurityContext. If
set in both SecurityContext and PodSecurityContext, the value
specified in SecurityContext takes precedence for that container.
Note that this field cannot be set when spec.os.name is windows.
properties:
level:
description: Level is SELinux level label that applies to
@ -3548,7 +3677,8 @@ spec:
type: object
seccompProfile:
description: The seccomp options to use by the containers in this
pod.
pod. Note that this field cannot be set when spec.os.name is
windows.
properties:
localhostProfile:
description: localhostProfile indicates a profile defined
@ -3570,7 +3700,8 @@ spec:
supplementalGroups:
description: A list of groups applied to the first process run
in each container, in addition to the container's primary GID. If
unspecified, no groups will be added to any container.
unspecified, no groups will be added to any container. Note
that this field cannot be set when spec.os.name is windows.
items:
format: int64
type: integer
@ -3578,7 +3709,8 @@ spec:
sysctls:
description: Sysctls hold a list of namespaced sysctls used for
the pod. Pods with unsupported sysctls (by the container runtime)
might fail to launch.
might fail to launch. Note that this field cannot be set when
spec.os.name is windows.
items:
description: Sysctl defines a kernel parameter to be set
properties:
@ -3597,7 +3729,8 @@ spec:
description: The Windows specific settings applied to all containers.
If unspecified, the options within a container's SecurityContext
will be used. If set in both SecurityContext and PodSecurityContext,
the value specified in SecurityContext takes precedence.
the value specified in SecurityContext takes precedence. Note
that this field cannot be set when spec.os.name is linux.
properties:
gmsaCredentialSpec:
description: GMSACredentialSpec is where the GMSA admission
@ -3792,7 +3925,11 @@ spec:
type: object
resources:
description: 'Resources represents the minimum resources
the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources'
the volume should have. If RecoverVolumeExpansionFailure
feature is enabled users are allowed to specify
resource requirements that are lower than previous
value but must still be higher than capacity recorded
in the status field of the claim. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources'
properties:
limits:
additionalProperties:
@ -4007,7 +4144,11 @@ spec:
type: object
resources:
description: 'Resources represents the minimum resources
the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources'
the volume should have. If RecoverVolumeExpansionFailure
feature is enabled users are allowed to specify resource
requirements that are lower than previous value but
must still be higher than capacity recorded in the status
field of the claim. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources'
properties:
limits:
additionalProperties:
@ -4103,6 +4244,27 @@ spec:
items:
type: string
type: array
allocatedResources:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: The storage resource within AllocatedResources
tracks the capacity allocated to a PVC. It may be larger
than the actual capacity when a volume expansion operation
is requested. For storage quota, the larger value from
allocatedResources and PVC.spec.resources is used. If
allocatedResources is not set, PVC.spec.resources alone
is used for quota calculation. If a volume expansion
capacity request is lowered, allocatedResources is only
lowered if there are no expansion operations in progress
and if the actual volume capacity is equal or lower
than the requested capacity. This is an alpha field
and requires enabling RecoverVolumeExpansionFailure
feature.
type: object
capacity:
additionalProperties:
anyOf:
@ -4155,6 +4317,13 @@ spec:
phase:
description: Phase represents the current phase of PersistentVolumeClaim.
type: string
resizeStatus:
description: ResizeStatus stores status of resize operation.
ResizeStatus is not set by default but when expansion
is complete resizeStatus is set to empty string by resize
controller or kubelet. This is an alpha field and requires
enabling RecoverVolumeExpansionFailure feature.
type: string
type: object
type: object
type: object
@ -4287,7 +4456,7 @@ spec:
tells the scheduler to schedule the pod in any location, but
giving higher precedence to topologies that would help reduce
the skew. A constraint is considered "Unsatisfiable" for
an incoming pod if and only if every possible node assigment
an incoming pod if and only if every possible node assignment
for that pod would violate "MaxSkew" on some topology. For
example, in a 3-zone cluster, MaxSkew is set to 1, and pods
with the same labelSelector spread as 3/1/1: | zone1 | zone2
@ -4750,9 +4919,7 @@ spec:
volumes if the CSI driver is meant to be used that way - see
the documentation of the driver for more information. \n A
pod can use both types of ephemeral volumes and persistent
volumes at the same time. \n This is a beta feature and only
available when the GenericEphemeralVolume feature gate is
enabled."
volumes at the same time."
properties:
volumeClaimTemplate:
description: "Will be used to create a stand-alone PVC to
@ -4869,7 +5036,11 @@ spec:
type: object
resources:
description: 'Resources represents the minimum resources
the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources'
the volume should have. If RecoverVolumeExpansionFailure
feature is enabled users are allowed to specify
resource requirements that are lower than previous
value but must still be higher than capacity recorded
in the status field of the claim. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources'
properties:
limits:
additionalProperties:

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
---
apiVersion: apiextensions.k8s.io/v1

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
---
apiVersion: apiextensions.k8s.io/v1

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
---
apiVersion: apiextensions.k8s.io/v1
@ -1640,8 +1640,7 @@ spec:
info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks'
properties:
exec:
description: One and only one of the following should
be specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -1702,9 +1701,10 @@ spec:
- port
type: object
tcpSocket:
description: 'TCPSocket specifies an action involving
a TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: Deprecated. TCPSocket is NOT supported
as a LifecycleHandler and kept for the backward compatibility.
There are no validation of this field and lifecycle
hooks will fail in runtime when tcp handler is specified.
properties:
host:
description: 'Optional: Host name to connect to,
@ -1727,18 +1727,16 @@ spec:
is terminated due to an API request or management event
such as liveness/startup probe failure, preemption, resource
contention, etc. The handler is not called if the container
crashes or exits. The reason for termination is passed
to the handler. The Pod''s termination grace period countdown
begins before the PreStop hooked is executed. Regardless
of the outcome of the handler, the container will eventually
terminate within the Pod''s termination grace period.
Other management of the container blocks until the hook
completes or until the termination grace period is reached.
More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks'
crashes or exits. The Pod''s termination grace period
countdown begins before the PreStop hook is executed.
Regardless of the outcome of the handler, the container
will eventually terminate within the Pod''s termination
grace period (unless delayed by finalizers). Other management
of the container blocks until the hook completes or until
the termination grace period is reached. More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks'
properties:
exec:
description: One and only one of the following should
be specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -1799,9 +1797,10 @@ spec:
- port
type: object
tcpSocket:
description: 'TCPSocket specifies an action involving
a TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: Deprecated. TCPSocket is NOT supported
as a LifecycleHandler and kept for the backward compatibility.
There are no validation of this field and lifecycle
hooks will fail in runtime when tcp handler is specified.
properties:
host:
description: 'Optional: Host name to connect to,
@ -1826,8 +1825,7 @@ spec:
info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
properties:
exec:
description: One and only one of the following should be
specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -1848,6 +1846,25 @@ spec:
to 3. Minimum value is 1.
format: int32
type: integer
grpc:
description: GRPC specifies an action involving a GRPC port.
This is an alpha field and requires enabling GRPCContainerProbe
feature gate.
properties:
port:
description: Port number of the gRPC service. Number
must be in the range 1 to 65535.
format: int32
type: integer
service:
description: "Service is the name of the service to
place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
\n If this is not specified, the default behavior
is defined by gRPC."
type: string
required:
- port
type: object
httpGet:
description: HTTPGet specifies the http request to perform.
properties:
@ -1911,9 +1928,8 @@ spec:
format: int32
type: integer
tcpSocket:
description: 'TCPSocket specifies an action involving a
TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: TCPSocket specifies an action involving a TCP
port.
properties:
host:
description: 'Optional: Host name to connect to, defaults
@ -2011,8 +2027,7 @@ spec:
fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
properties:
exec:
description: One and only one of the following should be
specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -2033,6 +2048,25 @@ spec:
to 3. Minimum value is 1.
format: int32
type: integer
grpc:
description: GRPC specifies an action involving a GRPC port.
This is an alpha field and requires enabling GRPCContainerProbe
feature gate.
properties:
port:
description: Port number of the gRPC service. Number
must be in the range 1 to 65535.
format: int32
type: integer
service:
description: "Service is the name of the service to
place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
\n If this is not specified, the default behavior
is defined by gRPC."
type: string
required:
- port
type: object
httpGet:
description: HTTPGet specifies the http request to perform.
properties:
@ -2096,9 +2130,8 @@ spec:
format: int32
type: integer
tcpSocket:
description: 'TCPSocket specifies an action involving a
TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: TCPSocket specifies an action involving a TCP
port.
properties:
host:
description: 'Optional: Host name to connect to, defaults
@ -2179,12 +2212,14 @@ spec:
This bool directly controls if the no_new_privs flag will
be set on the container process. AllowPrivilegeEscalation
is true always when the container is: 1) run as Privileged
2) has CAP_SYS_ADMIN'
2) has CAP_SYS_ADMIN Note that this field cannot be set
when spec.os.name is windows.'
type: boolean
capabilities:
description: The capabilities to add/drop when running containers.
Defaults to the default set of capabilities granted by
the container runtime.
the container runtime. Note that this field cannot be
set when spec.os.name is windows.
properties:
add:
description: Added capabilities
@ -2204,25 +2239,29 @@ spec:
privileged:
description: Run container in privileged mode. Processes
in privileged containers are essentially equivalent to
root on the host. Defaults to false.
root on the host. Defaults to false. Note that this field
cannot be set when spec.os.name is windows.
type: boolean
procMount:
description: procMount denotes the type of proc mount to
use for the containers. The default is DefaultProcMount
which uses the container runtime defaults for readonly
paths and masked paths. This requires the ProcMountType
feature flag to be enabled.
feature flag to be enabled. Note that this field cannot
be set when spec.os.name is windows.
type: string
readOnlyRootFilesystem:
description: Whether this container has a read-only root
filesystem. Default is false.
filesystem. Default is false. Note that this field cannot
be set when spec.os.name is windows.
type: boolean
runAsGroup:
description: The GID to run the entrypoint of the container
process. Uses runtime default if unset. May also be set
in PodSecurityContext. If set in both SecurityContext
and PodSecurityContext, the value specified in SecurityContext
takes precedence.
takes precedence. Note that this field cannot be set when
spec.os.name is windows.
format: int64
type: integer
runAsNonRoot:
@ -2240,7 +2279,8 @@ spec:
process. Defaults to user specified in image metadata
if unspecified. May also be set in PodSecurityContext. If
set in both SecurityContext and PodSecurityContext, the
value specified in SecurityContext takes precedence.
value specified in SecurityContext takes precedence. Note
that this field cannot be set when spec.os.name is windows.
format: int64
type: integer
seLinuxOptions:
@ -2249,7 +2289,8 @@ spec:
random SELinux context for each container. May also be
set in PodSecurityContext. If set in both SecurityContext
and PodSecurityContext, the value specified in SecurityContext
takes precedence.
takes precedence. Note that this field cannot be set when
spec.os.name is windows.
properties:
level:
description: Level is SELinux level label that applies
@ -2272,6 +2313,8 @@ spec:
description: The seccomp options to use by this container.
If seccomp options are provided at both the pod & container
level, the container options override the pod options.
Note that this field cannot be set when spec.os.name is
windows.
properties:
localhostProfile:
description: localhostProfile indicates a profile defined
@ -2297,6 +2340,8 @@ spec:
containers. If unspecified, the options from the PodSecurityContext
will be used. If set in both SecurityContext and PodSecurityContext,
the value specified in SecurityContext takes precedence.
Note that this field cannot be set when spec.os.name is
linux.
properties:
gmsaCredentialSpec:
description: GMSACredentialSpec is where the GMSA admission
@ -2342,8 +2387,7 @@ spec:
This cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
properties:
exec:
description: One and only one of the following should be
specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -2364,6 +2408,25 @@ spec:
to 3. Minimum value is 1.
format: int32
type: integer
grpc:
description: GRPC specifies an action involving a GRPC port.
This is an alpha field and requires enabling GRPCContainerProbe
feature gate.
properties:
port:
description: Port number of the gRPC service. Number
must be in the range 1 to 65535.
format: int32
type: integer
service:
description: "Service is the name of the service to
place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
\n If this is not specified, the default behavior
is defined by gRPC."
type: string
required:
- port
type: object
httpGet:
description: HTTPGet specifies the http request to perform.
properties:
@ -2427,9 +2490,8 @@ spec:
format: int32
type: integer
tcpSocket:
description: 'TCPSocket specifies an action involving a
TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: TCPSocket specifies an action involving a TCP
port.
properties:
host:
description: 'Optional: Host name to connect to, defaults
@ -2925,8 +2987,7 @@ spec:
info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks'
properties:
exec:
description: One and only one of the following should
be specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -2987,9 +3048,10 @@ spec:
- port
type: object
tcpSocket:
description: 'TCPSocket specifies an action involving
a TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: Deprecated. TCPSocket is NOT supported
as a LifecycleHandler and kept for the backward compatibility.
There are no validation of this field and lifecycle
hooks will fail in runtime when tcp handler is specified.
properties:
host:
description: 'Optional: Host name to connect to,
@ -3012,18 +3074,16 @@ spec:
is terminated due to an API request or management event
such as liveness/startup probe failure, preemption, resource
contention, etc. The handler is not called if the container
crashes or exits. The reason for termination is passed
to the handler. The Pod''s termination grace period countdown
begins before the PreStop hooked is executed. Regardless
of the outcome of the handler, the container will eventually
terminate within the Pod''s termination grace period.
Other management of the container blocks until the hook
completes or until the termination grace period is reached.
More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks'
crashes or exits. The Pod''s termination grace period
countdown begins before the PreStop hook is executed.
Regardless of the outcome of the handler, the container
will eventually terminate within the Pod''s termination
grace period (unless delayed by finalizers). Other management
of the container blocks until the hook completes or until
the termination grace period is reached. More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks'
properties:
exec:
description: One and only one of the following should
be specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -3084,9 +3144,10 @@ spec:
- port
type: object
tcpSocket:
description: 'TCPSocket specifies an action involving
a TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: Deprecated. TCPSocket is NOT supported
as a LifecycleHandler and kept for the backward compatibility.
There are no validation of this field and lifecycle
hooks will fail in runtime when tcp handler is specified.
properties:
host:
description: 'Optional: Host name to connect to,
@ -3111,8 +3172,7 @@ spec:
info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
properties:
exec:
description: One and only one of the following should be
specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -3133,6 +3193,25 @@ spec:
to 3. Minimum value is 1.
format: int32
type: integer
grpc:
description: GRPC specifies an action involving a GRPC port.
This is an alpha field and requires enabling GRPCContainerProbe
feature gate.
properties:
port:
description: Port number of the gRPC service. Number
must be in the range 1 to 65535.
format: int32
type: integer
service:
description: "Service is the name of the service to
place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
\n If this is not specified, the default behavior
is defined by gRPC."
type: string
required:
- port
type: object
httpGet:
description: HTTPGet specifies the http request to perform.
properties:
@ -3196,9 +3275,8 @@ spec:
format: int32
type: integer
tcpSocket:
description: 'TCPSocket specifies an action involving a
TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: TCPSocket specifies an action involving a TCP
port.
properties:
host:
description: 'Optional: Host name to connect to, defaults
@ -3296,8 +3374,7 @@ spec:
fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
properties:
exec:
description: One and only one of the following should be
specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -3318,6 +3395,25 @@ spec:
to 3. Minimum value is 1.
format: int32
type: integer
grpc:
description: GRPC specifies an action involving a GRPC port.
This is an alpha field and requires enabling GRPCContainerProbe
feature gate.
properties:
port:
description: Port number of the gRPC service. Number
must be in the range 1 to 65535.
format: int32
type: integer
service:
description: "Service is the name of the service to
place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
\n If this is not specified, the default behavior
is defined by gRPC."
type: string
required:
- port
type: object
httpGet:
description: HTTPGet specifies the http request to perform.
properties:
@ -3381,9 +3477,8 @@ spec:
format: int32
type: integer
tcpSocket:
description: 'TCPSocket specifies an action involving a
TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: TCPSocket specifies an action involving a TCP
port.
properties:
host:
description: 'Optional: Host name to connect to, defaults
@ -3464,12 +3559,14 @@ spec:
This bool directly controls if the no_new_privs flag will
be set on the container process. AllowPrivilegeEscalation
is true always when the container is: 1) run as Privileged
2) has CAP_SYS_ADMIN'
2) has CAP_SYS_ADMIN Note that this field cannot be set
when spec.os.name is windows.'
type: boolean
capabilities:
description: The capabilities to add/drop when running containers.
Defaults to the default set of capabilities granted by
the container runtime.
the container runtime. Note that this field cannot be
set when spec.os.name is windows.
properties:
add:
description: Added capabilities
@ -3489,25 +3586,29 @@ spec:
privileged:
description: Run container in privileged mode. Processes
in privileged containers are essentially equivalent to
root on the host. Defaults to false.
root on the host. Defaults to false. Note that this field
cannot be set when spec.os.name is windows.
type: boolean
procMount:
description: procMount denotes the type of proc mount to
use for the containers. The default is DefaultProcMount
which uses the container runtime defaults for readonly
paths and masked paths. This requires the ProcMountType
feature flag to be enabled.
feature flag to be enabled. Note that this field cannot
be set when spec.os.name is windows.
type: string
readOnlyRootFilesystem:
description: Whether this container has a read-only root
filesystem. Default is false.
filesystem. Default is false. Note that this field cannot
be set when spec.os.name is windows.
type: boolean
runAsGroup:
description: The GID to run the entrypoint of the container
process. Uses runtime default if unset. May also be set
in PodSecurityContext. If set in both SecurityContext
and PodSecurityContext, the value specified in SecurityContext
takes precedence.
takes precedence. Note that this field cannot be set when
spec.os.name is windows.
format: int64
type: integer
runAsNonRoot:
@ -3525,7 +3626,8 @@ spec:
process. Defaults to user specified in image metadata
if unspecified. May also be set in PodSecurityContext. If
set in both SecurityContext and PodSecurityContext, the
value specified in SecurityContext takes precedence.
value specified in SecurityContext takes precedence. Note
that this field cannot be set when spec.os.name is windows.
format: int64
type: integer
seLinuxOptions:
@ -3534,7 +3636,8 @@ spec:
random SELinux context for each container. May also be
set in PodSecurityContext. If set in both SecurityContext
and PodSecurityContext, the value specified in SecurityContext
takes precedence.
takes precedence. Note that this field cannot be set when
spec.os.name is windows.
properties:
level:
description: Level is SELinux level label that applies
@ -3557,6 +3660,8 @@ spec:
description: The seccomp options to use by this container.
If seccomp options are provided at both the pod & container
level, the container options override the pod options.
Note that this field cannot be set when spec.os.name is
windows.
properties:
localhostProfile:
description: localhostProfile indicates a profile defined
@ -3582,6 +3687,8 @@ spec:
containers. If unspecified, the options from the PodSecurityContext
will be used. If set in both SecurityContext and PodSecurityContext,
the value specified in SecurityContext takes precedence.
Note that this field cannot be set when spec.os.name is
linux.
properties:
gmsaCredentialSpec:
description: GMSACredentialSpec is where the GMSA admission
@ -3627,8 +3734,7 @@ spec:
This cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
properties:
exec:
description: One and only one of the following should be
specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -3649,6 +3755,25 @@ spec:
to 3. Minimum value is 1.
format: int32
type: integer
grpc:
description: GRPC specifies an action involving a GRPC port.
This is an alpha field and requires enabling GRPCContainerProbe
feature gate.
properties:
port:
description: Port number of the gRPC service. Number
must be in the range 1 to 65535.
format: int32
type: integer
service:
description: "Service is the name of the service to
place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
\n If this is not specified, the default behavior
is defined by gRPC."
type: string
required:
- port
type: object
httpGet:
description: HTTPGet specifies the http request to perform.
properties:
@ -3712,9 +3837,8 @@ spec:
format: int32
type: integer
tcpSocket:
description: 'TCPSocket specifies an action involving a
TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: TCPSocket specifies an action involving a TCP
port.
properties:
host:
description: 'Optional: Host name to connect to, defaults
@ -4261,6 +4385,14 @@ spec:
bearerTokenFile:
description: File to read bearer token for remote read.
type: string
headers:
additionalProperties:
type: string
description: Custom HTTP headers to be sent along with each
remote read request. Be aware that headers that are set by
Prometheus itself can't be overwritten. Only valid in Prometheus
versions 2.26.0 and newer.
type: object
name:
description: The name of the remote read queue, must be unique
if specified. The name is used in metrics and logging in order
@ -4736,6 +4868,11 @@ spec:
description: MinShards is the minimum number of shards,
i.e. amount of concurrency.
type: integer
retryOnRateLimit:
description: Retry upon receiving a 429 status code from
the remote-write storage. This is experimental feature
and might change in the future.
type: boolean
type: object
remoteTimeout:
description: Timeout for requests to the remote write endpoint.
@ -5173,7 +5310,8 @@ spec:
set (new files created in the volume will be owned by FSGroup)
3. The permission bits are OR'd with rw-rw---- \n If unset,
the Kubelet will not modify the ownership and permissions of
any volume."
any volume. Note that this field cannot be set when spec.os.name
is windows."
format: int64
type: integer
fsGroupChangePolicy:
@ -5183,13 +5321,15 @@ spec:
support fsGroup based ownership(and permissions). It will have
no effect on ephemeral volume types such as: secret, configmaps
and emptydir. Valid values are "OnRootMismatch" and "Always".
If not specified, "Always" is used.'
If not specified, "Always" is used. Note that this field cannot
be set when spec.os.name is windows.'
type: string
runAsGroup:
description: The GID to run the entrypoint of the container process.
Uses runtime default if unset. May also be set in SecurityContext. If
set in both SecurityContext and PodSecurityContext, the value
specified in SecurityContext takes precedence for that container.
Note that this field cannot be set when spec.os.name is windows.
format: int64
type: integer
runAsNonRoot:
@ -5206,7 +5346,8 @@ spec:
Defaults to user specified in image metadata if unspecified.
May also be set in SecurityContext. If set in both SecurityContext
and PodSecurityContext, the value specified in SecurityContext
takes precedence for that container.
takes precedence for that container. Note that this field cannot
be set when spec.os.name is windows.
format: int64
type: integer
seLinuxOptions:
@ -5215,6 +5356,7 @@ spec:
SELinux context for each container. May also be set in SecurityContext. If
set in both SecurityContext and PodSecurityContext, the value
specified in SecurityContext takes precedence for that container.
Note that this field cannot be set when spec.os.name is windows.
properties:
level:
description: Level is SELinux level label that applies to
@ -5235,7 +5377,8 @@ spec:
type: object
seccompProfile:
description: The seccomp options to use by the containers in this
pod.
pod. Note that this field cannot be set when spec.os.name is
windows.
properties:
localhostProfile:
description: localhostProfile indicates a profile defined
@ -5257,7 +5400,8 @@ spec:
supplementalGroups:
description: A list of groups applied to the first process run
in each container, in addition to the container's primary GID. If
unspecified, no groups will be added to any container.
unspecified, no groups will be added to any container. Note
that this field cannot be set when spec.os.name is windows.
items:
format: int64
type: integer
@ -5265,7 +5409,8 @@ spec:
sysctls:
description: Sysctls hold a list of namespaced sysctls used for
the pod. Pods with unsupported sysctls (by the container runtime)
might fail to launch.
might fail to launch. Note that this field cannot be set when
spec.os.name is windows.
items:
description: Sysctl defines a kernel parameter to be set
properties:
@ -5284,7 +5429,8 @@ spec:
description: The Windows specific settings applied to all containers.
If unspecified, the options within a container's SecurityContext
will be used. If set in both SecurityContext and PodSecurityContext,
the value specified in SecurityContext takes precedence.
the value specified in SecurityContext takes precedence. Note
that this field cannot be set when spec.os.name is linux.
properties:
gmsaCredentialSpec:
description: GMSACredentialSpec is where the GMSA admission
@ -5580,7 +5726,11 @@ spec:
type: object
resources:
description: 'Resources represents the minimum resources
the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources'
the volume should have. If RecoverVolumeExpansionFailure
feature is enabled users are allowed to specify
resource requirements that are lower than previous
value but must still be higher than capacity recorded
in the status field of the claim. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources'
properties:
limits:
additionalProperties:
@ -5795,7 +5945,11 @@ spec:
type: object
resources:
description: 'Resources represents the minimum resources
the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources'
the volume should have. If RecoverVolumeExpansionFailure
feature is enabled users are allowed to specify resource
requirements that are lower than previous value but
must still be higher than capacity recorded in the status
field of the claim. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources'
properties:
limits:
additionalProperties:
@ -5891,6 +6045,27 @@ spec:
items:
type: string
type: array
allocatedResources:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: The storage resource within AllocatedResources
tracks the capacity allocated to a PVC. It may be larger
than the actual capacity when a volume expansion operation
is requested. For storage quota, the larger value from
allocatedResources and PVC.spec.resources is used. If
allocatedResources is not set, PVC.spec.resources alone
is used for quota calculation. If a volume expansion
capacity request is lowered, allocatedResources is only
lowered if there are no expansion operations in progress
and if the actual volume capacity is equal or lower
than the requested capacity. This is an alpha field
and requires enabling RecoverVolumeExpansionFailure
feature.
type: object
capacity:
additionalProperties:
anyOf:
@ -5943,6 +6118,13 @@ spec:
phase:
description: Phase represents the current phase of PersistentVolumeClaim.
type: string
resizeStatus:
description: ResizeStatus stores status of resize operation.
ResizeStatus is not set by default but when expansion
is complete resizeStatus is set to empty string by resize
controller or kubelet. This is an alpha field and requires
enabling RecoverVolumeExpansionFailure feature.
type: string
type: object
type: object
type: object
@ -6380,7 +6562,7 @@ spec:
tells the scheduler to schedule the pod in any location, but
giving higher precedence to topologies that would help reduce
the skew. A constraint is considered "Unsatisfiable" for
an incoming pod if and only if every possible node assigment
an incoming pod if and only if every possible node assignment
for that pod would violate "MaxSkew" on some topology. For
example, in a 3-zone cluster, MaxSkew is set to 1, and pods
with the same labelSelector spread as 3/1/1: | zone1 | zone2
@ -6843,9 +7025,7 @@ spec:
volumes if the CSI driver is meant to be used that way - see
the documentation of the driver for more information. \n A
pod can use both types of ephemeral volumes and persistent
volumes at the same time. \n This is a beta feature and only
available when the GenericEphemeralVolume feature gate is
enabled."
volumes at the same time."
properties:
volumeClaimTemplate:
description: "Will be used to create a stand-alone PVC to
@ -6962,7 +7142,11 @@ spec:
type: object
resources:
description: 'Resources represents the minimum resources
the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources'
the volume should have. If RecoverVolumeExpansionFailure
feature is enabled users are allowed to specify
resource requirements that are lower than previous
value but must still be higher than capacity recorded
in the status field of the claim. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources'
properties:
limits:
additionalProperties:

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
---
apiVersion: apiextensions.k8s.io/v1
@ -46,7 +46,7 @@ spec:
description: 'RuleGroup is a list of sequentially evaluated recording
and alerting rules. Note: PartialResponseStrategy is only used
by ThanosRuler and will be ignored by Prometheus instances. Valid
values for this field are ''warn'' or ''abort''. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response'
values for this field are ''warn'' or ''abort''. More info: https://github.com/thanos-io/thanos/blob/main/docs/components/rule.md#partial-response'
properties:
interval:
type: string

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
---
apiVersion: apiextensions.k8s.io/v1

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
---
apiVersion: apiextensions.k8s.io/v1
@ -1152,8 +1152,7 @@ spec:
info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks'
properties:
exec:
description: One and only one of the following should
be specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -1214,9 +1213,10 @@ spec:
- port
type: object
tcpSocket:
description: 'TCPSocket specifies an action involving
a TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: Deprecated. TCPSocket is NOT supported
as a LifecycleHandler and kept for the backward compatibility.
There are no validation of this field and lifecycle
hooks will fail in runtime when tcp handler is specified.
properties:
host:
description: 'Optional: Host name to connect to,
@ -1239,18 +1239,16 @@ spec:
is terminated due to an API request or management event
such as liveness/startup probe failure, preemption, resource
contention, etc. The handler is not called if the container
crashes or exits. The reason for termination is passed
to the handler. The Pod''s termination grace period countdown
begins before the PreStop hooked is executed. Regardless
of the outcome of the handler, the container will eventually
terminate within the Pod''s termination grace period.
Other management of the container blocks until the hook
completes or until the termination grace period is reached.
More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks'
crashes or exits. The Pod''s termination grace period
countdown begins before the PreStop hook is executed.
Regardless of the outcome of the handler, the container
will eventually terminate within the Pod''s termination
grace period (unless delayed by finalizers). Other management
of the container blocks until the hook completes or until
the termination grace period is reached. More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks'
properties:
exec:
description: One and only one of the following should
be specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -1311,9 +1309,10 @@ spec:
- port
type: object
tcpSocket:
description: 'TCPSocket specifies an action involving
a TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: Deprecated. TCPSocket is NOT supported
as a LifecycleHandler and kept for the backward compatibility.
There are no validation of this field and lifecycle
hooks will fail in runtime when tcp handler is specified.
properties:
host:
description: 'Optional: Host name to connect to,
@ -1338,8 +1337,7 @@ spec:
info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
properties:
exec:
description: One and only one of the following should be
specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -1360,6 +1358,25 @@ spec:
to 3. Minimum value is 1.
format: int32
type: integer
grpc:
description: GRPC specifies an action involving a GRPC port.
This is an alpha field and requires enabling GRPCContainerProbe
feature gate.
properties:
port:
description: Port number of the gRPC service. Number
must be in the range 1 to 65535.
format: int32
type: integer
service:
description: "Service is the name of the service to
place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
\n If this is not specified, the default behavior
is defined by gRPC."
type: string
required:
- port
type: object
httpGet:
description: HTTPGet specifies the http request to perform.
properties:
@ -1423,9 +1440,8 @@ spec:
format: int32
type: integer
tcpSocket:
description: 'TCPSocket specifies an action involving a
TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: TCPSocket specifies an action involving a TCP
port.
properties:
host:
description: 'Optional: Host name to connect to, defaults
@ -1523,8 +1539,7 @@ spec:
fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
properties:
exec:
description: One and only one of the following should be
specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -1545,6 +1560,25 @@ spec:
to 3. Minimum value is 1.
format: int32
type: integer
grpc:
description: GRPC specifies an action involving a GRPC port.
This is an alpha field and requires enabling GRPCContainerProbe
feature gate.
properties:
port:
description: Port number of the gRPC service. Number
must be in the range 1 to 65535.
format: int32
type: integer
service:
description: "Service is the name of the service to
place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
\n If this is not specified, the default behavior
is defined by gRPC."
type: string
required:
- port
type: object
httpGet:
description: HTTPGet specifies the http request to perform.
properties:
@ -1608,9 +1642,8 @@ spec:
format: int32
type: integer
tcpSocket:
description: 'TCPSocket specifies an action involving a
TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: TCPSocket specifies an action involving a TCP
port.
properties:
host:
description: 'Optional: Host name to connect to, defaults
@ -1691,12 +1724,14 @@ spec:
This bool directly controls if the no_new_privs flag will
be set on the container process. AllowPrivilegeEscalation
is true always when the container is: 1) run as Privileged
2) has CAP_SYS_ADMIN'
2) has CAP_SYS_ADMIN Note that this field cannot be set
when spec.os.name is windows.'
type: boolean
capabilities:
description: The capabilities to add/drop when running containers.
Defaults to the default set of capabilities granted by
the container runtime.
the container runtime. Note that this field cannot be
set when spec.os.name is windows.
properties:
add:
description: Added capabilities
@ -1716,25 +1751,29 @@ spec:
privileged:
description: Run container in privileged mode. Processes
in privileged containers are essentially equivalent to
root on the host. Defaults to false.
root on the host. Defaults to false. Note that this field
cannot be set when spec.os.name is windows.
type: boolean
procMount:
description: procMount denotes the type of proc mount to
use for the containers. The default is DefaultProcMount
which uses the container runtime defaults for readonly
paths and masked paths. This requires the ProcMountType
feature flag to be enabled.
feature flag to be enabled. Note that this field cannot
be set when spec.os.name is windows.
type: string
readOnlyRootFilesystem:
description: Whether this container has a read-only root
filesystem. Default is false.
filesystem. Default is false. Note that this field cannot
be set when spec.os.name is windows.
type: boolean
runAsGroup:
description: The GID to run the entrypoint of the container
process. Uses runtime default if unset. May also be set
in PodSecurityContext. If set in both SecurityContext
and PodSecurityContext, the value specified in SecurityContext
takes precedence.
takes precedence. Note that this field cannot be set when
spec.os.name is windows.
format: int64
type: integer
runAsNonRoot:
@ -1752,7 +1791,8 @@ spec:
process. Defaults to user specified in image metadata
if unspecified. May also be set in PodSecurityContext. If
set in both SecurityContext and PodSecurityContext, the
value specified in SecurityContext takes precedence.
value specified in SecurityContext takes precedence. Note
that this field cannot be set when spec.os.name is windows.
format: int64
type: integer
seLinuxOptions:
@ -1761,7 +1801,8 @@ spec:
random SELinux context for each container. May also be
set in PodSecurityContext. If set in both SecurityContext
and PodSecurityContext, the value specified in SecurityContext
takes precedence.
takes precedence. Note that this field cannot be set when
spec.os.name is windows.
properties:
level:
description: Level is SELinux level label that applies
@ -1784,6 +1825,8 @@ spec:
description: The seccomp options to use by this container.
If seccomp options are provided at both the pod & container
level, the container options override the pod options.
Note that this field cannot be set when spec.os.name is
windows.
properties:
localhostProfile:
description: localhostProfile indicates a profile defined
@ -1809,6 +1852,8 @@ spec:
containers. If unspecified, the options from the PodSecurityContext
will be used. If set in both SecurityContext and PodSecurityContext,
the value specified in SecurityContext takes precedence.
Note that this field cannot be set when spec.os.name is
linux.
properties:
gmsaCredentialSpec:
description: GMSACredentialSpec is where the GMSA admission
@ -1854,8 +1899,7 @@ spec:
This cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
properties:
exec:
description: One and only one of the following should be
specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -1876,6 +1920,25 @@ spec:
to 3. Minimum value is 1.
format: int32
type: integer
grpc:
description: GRPC specifies an action involving a GRPC port.
This is an alpha field and requires enabling GRPCContainerProbe
feature gate.
properties:
port:
description: Port number of the gRPC service. Number
must be in the range 1 to 65535.
format: int32
type: integer
service:
description: "Service is the name of the service to
place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
\n If this is not specified, the default behavior
is defined by gRPC."
type: string
required:
- port
type: object
httpGet:
description: HTTPGet specifies the http request to perform.
properties:
@ -1939,9 +2002,8 @@ spec:
format: int32
type: integer
tcpSocket:
description: 'TCPSocket specifies an action involving a
TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: TCPSocket specifies an action involving a TCP
port.
properties:
host:
description: 'Optional: Host name to connect to, defaults
@ -2465,8 +2527,7 @@ spec:
info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks'
properties:
exec:
description: One and only one of the following should
be specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -2527,9 +2588,10 @@ spec:
- port
type: object
tcpSocket:
description: 'TCPSocket specifies an action involving
a TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: Deprecated. TCPSocket is NOT supported
as a LifecycleHandler and kept for the backward compatibility.
There are no validation of this field and lifecycle
hooks will fail in runtime when tcp handler is specified.
properties:
host:
description: 'Optional: Host name to connect to,
@ -2552,18 +2614,16 @@ spec:
is terminated due to an API request or management event
such as liveness/startup probe failure, preemption, resource
contention, etc. The handler is not called if the container
crashes or exits. The reason for termination is passed
to the handler. The Pod''s termination grace period countdown
begins before the PreStop hooked is executed. Regardless
of the outcome of the handler, the container will eventually
terminate within the Pod''s termination grace period.
Other management of the container blocks until the hook
completes or until the termination grace period is reached.
More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks'
crashes or exits. The Pod''s termination grace period
countdown begins before the PreStop hook is executed.
Regardless of the outcome of the handler, the container
will eventually terminate within the Pod''s termination
grace period (unless delayed by finalizers). Other management
of the container blocks until the hook completes or until
the termination grace period is reached. More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks'
properties:
exec:
description: One and only one of the following should
be specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -2624,9 +2684,10 @@ spec:
- port
type: object
tcpSocket:
description: 'TCPSocket specifies an action involving
a TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: Deprecated. TCPSocket is NOT supported
as a LifecycleHandler and kept for the backward compatibility.
There are no validation of this field and lifecycle
hooks will fail in runtime when tcp handler is specified.
properties:
host:
description: 'Optional: Host name to connect to,
@ -2651,8 +2712,7 @@ spec:
info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
properties:
exec:
description: One and only one of the following should be
specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -2673,6 +2733,25 @@ spec:
to 3. Minimum value is 1.
format: int32
type: integer
grpc:
description: GRPC specifies an action involving a GRPC port.
This is an alpha field and requires enabling GRPCContainerProbe
feature gate.
properties:
port:
description: Port number of the gRPC service. Number
must be in the range 1 to 65535.
format: int32
type: integer
service:
description: "Service is the name of the service to
place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
\n If this is not specified, the default behavior
is defined by gRPC."
type: string
required:
- port
type: object
httpGet:
description: HTTPGet specifies the http request to perform.
properties:
@ -2736,9 +2815,8 @@ spec:
format: int32
type: integer
tcpSocket:
description: 'TCPSocket specifies an action involving a
TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: TCPSocket specifies an action involving a TCP
port.
properties:
host:
description: 'Optional: Host name to connect to, defaults
@ -2836,8 +2914,7 @@ spec:
fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
properties:
exec:
description: One and only one of the following should be
specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -2858,6 +2935,25 @@ spec:
to 3. Minimum value is 1.
format: int32
type: integer
grpc:
description: GRPC specifies an action involving a GRPC port.
This is an alpha field and requires enabling GRPCContainerProbe
feature gate.
properties:
port:
description: Port number of the gRPC service. Number
must be in the range 1 to 65535.
format: int32
type: integer
service:
description: "Service is the name of the service to
place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
\n If this is not specified, the default behavior
is defined by gRPC."
type: string
required:
- port
type: object
httpGet:
description: HTTPGet specifies the http request to perform.
properties:
@ -2921,9 +3017,8 @@ spec:
format: int32
type: integer
tcpSocket:
description: 'TCPSocket specifies an action involving a
TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: TCPSocket specifies an action involving a TCP
port.
properties:
host:
description: 'Optional: Host name to connect to, defaults
@ -3004,12 +3099,14 @@ spec:
This bool directly controls if the no_new_privs flag will
be set on the container process. AllowPrivilegeEscalation
is true always when the container is: 1) run as Privileged
2) has CAP_SYS_ADMIN'
2) has CAP_SYS_ADMIN Note that this field cannot be set
when spec.os.name is windows.'
type: boolean
capabilities:
description: The capabilities to add/drop when running containers.
Defaults to the default set of capabilities granted by
the container runtime.
the container runtime. Note that this field cannot be
set when spec.os.name is windows.
properties:
add:
description: Added capabilities
@ -3029,25 +3126,29 @@ spec:
privileged:
description: Run container in privileged mode. Processes
in privileged containers are essentially equivalent to
root on the host. Defaults to false.
root on the host. Defaults to false. Note that this field
cannot be set when spec.os.name is windows.
type: boolean
procMount:
description: procMount denotes the type of proc mount to
use for the containers. The default is DefaultProcMount
which uses the container runtime defaults for readonly
paths and masked paths. This requires the ProcMountType
feature flag to be enabled.
feature flag to be enabled. Note that this field cannot
be set when spec.os.name is windows.
type: string
readOnlyRootFilesystem:
description: Whether this container has a read-only root
filesystem. Default is false.
filesystem. Default is false. Note that this field cannot
be set when spec.os.name is windows.
type: boolean
runAsGroup:
description: The GID to run the entrypoint of the container
process. Uses runtime default if unset. May also be set
in PodSecurityContext. If set in both SecurityContext
and PodSecurityContext, the value specified in SecurityContext
takes precedence.
takes precedence. Note that this field cannot be set when
spec.os.name is windows.
format: int64
type: integer
runAsNonRoot:
@ -3065,7 +3166,8 @@ spec:
process. Defaults to user specified in image metadata
if unspecified. May also be set in PodSecurityContext. If
set in both SecurityContext and PodSecurityContext, the
value specified in SecurityContext takes precedence.
value specified in SecurityContext takes precedence. Note
that this field cannot be set when spec.os.name is windows.
format: int64
type: integer
seLinuxOptions:
@ -3074,7 +3176,8 @@ spec:
random SELinux context for each container. May also be
set in PodSecurityContext. If set in both SecurityContext
and PodSecurityContext, the value specified in SecurityContext
takes precedence.
takes precedence. Note that this field cannot be set when
spec.os.name is windows.
properties:
level:
description: Level is SELinux level label that applies
@ -3097,6 +3200,8 @@ spec:
description: The seccomp options to use by this container.
If seccomp options are provided at both the pod & container
level, the container options override the pod options.
Note that this field cannot be set when spec.os.name is
windows.
properties:
localhostProfile:
description: localhostProfile indicates a profile defined
@ -3122,6 +3227,8 @@ spec:
containers. If unspecified, the options from the PodSecurityContext
will be used. If set in both SecurityContext and PodSecurityContext,
the value specified in SecurityContext takes precedence.
Note that this field cannot be set when spec.os.name is
linux.
properties:
gmsaCredentialSpec:
description: GMSACredentialSpec is where the GMSA admission
@ -3167,8 +3274,7 @@ spec:
This cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
properties:
exec:
description: One and only one of the following should be
specified. Exec specifies the action to take.
description: Exec specifies the action to take.
properties:
command:
description: Command is the command line to execute
@ -3189,6 +3295,25 @@ spec:
to 3. Minimum value is 1.
format: int32
type: integer
grpc:
description: GRPC specifies an action involving a GRPC port.
This is an alpha field and requires enabling GRPCContainerProbe
feature gate.
properties:
port:
description: Port number of the gRPC service. Number
must be in the range 1 to 65535.
format: int32
type: integer
service:
description: "Service is the name of the service to
place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
\n If this is not specified, the default behavior
is defined by gRPC."
type: string
required:
- port
type: object
httpGet:
description: HTTPGet specifies the http request to perform.
properties:
@ -3252,9 +3377,8 @@ spec:
format: int32
type: integer
tcpSocket:
description: 'TCPSocket specifies an action involving a
TCP port. TCP hooks not yet supported TODO: implement
a realistic TCP lifecycle hook'
description: TCPSocket specifies an action involving a TCP
port.
properties:
host:
description: 'Optional: Host name to connect to, defaults
@ -3688,7 +3812,8 @@ spec:
set (new files created in the volume will be owned by FSGroup)
3. The permission bits are OR'd with rw-rw---- \n If unset,
the Kubelet will not modify the ownership and permissions of
any volume."
any volume. Note that this field cannot be set when spec.os.name
is windows."
format: int64
type: integer
fsGroupChangePolicy:
@ -3698,13 +3823,15 @@ spec:
support fsGroup based ownership(and permissions). It will have
no effect on ephemeral volume types such as: secret, configmaps
and emptydir. Valid values are "OnRootMismatch" and "Always".
If not specified, "Always" is used.'
If not specified, "Always" is used. Note that this field cannot
be set when spec.os.name is windows.'
type: string
runAsGroup:
description: The GID to run the entrypoint of the container process.
Uses runtime default if unset. May also be set in SecurityContext. If
set in both SecurityContext and PodSecurityContext, the value
specified in SecurityContext takes precedence for that container.
Note that this field cannot be set when spec.os.name is windows.
format: int64
type: integer
runAsNonRoot:
@ -3721,7 +3848,8 @@ spec:
Defaults to user specified in image metadata if unspecified.
May also be set in SecurityContext. If set in both SecurityContext
and PodSecurityContext, the value specified in SecurityContext
takes precedence for that container.
takes precedence for that container. Note that this field cannot
be set when spec.os.name is windows.
format: int64
type: integer
seLinuxOptions:
@ -3730,6 +3858,7 @@ spec:
SELinux context for each container. May also be set in SecurityContext. If
set in both SecurityContext and PodSecurityContext, the value
specified in SecurityContext takes precedence for that container.
Note that this field cannot be set when spec.os.name is windows.
properties:
level:
description: Level is SELinux level label that applies to
@ -3750,7 +3879,8 @@ spec:
type: object
seccompProfile:
description: The seccomp options to use by the containers in this
pod.
pod. Note that this field cannot be set when spec.os.name is
windows.
properties:
localhostProfile:
description: localhostProfile indicates a profile defined
@ -3772,7 +3902,8 @@ spec:
supplementalGroups:
description: A list of groups applied to the first process run
in each container, in addition to the container's primary GID. If
unspecified, no groups will be added to any container.
unspecified, no groups will be added to any container. Note
that this field cannot be set when spec.os.name is windows.
items:
format: int64
type: integer
@ -3780,7 +3911,8 @@ spec:
sysctls:
description: Sysctls hold a list of namespaced sysctls used for
the pod. Pods with unsupported sysctls (by the container runtime)
might fail to launch.
might fail to launch. Note that this field cannot be set when
spec.os.name is windows.
items:
description: Sysctl defines a kernel parameter to be set
properties:
@ -3799,7 +3931,8 @@ spec:
description: The Windows specific settings applied to all containers.
If unspecified, the options within a container's SecurityContext
will be used. If set in both SecurityContext and PodSecurityContext,
the value specified in SecurityContext takes precedence.
the value specified in SecurityContext takes precedence. Note
that this field cannot be set when spec.os.name is linux.
properties:
gmsaCredentialSpec:
description: GMSACredentialSpec is where the GMSA admission
@ -3986,7 +4119,11 @@ spec:
type: object
resources:
description: 'Resources represents the minimum resources
the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources'
the volume should have. If RecoverVolumeExpansionFailure
feature is enabled users are allowed to specify
resource requirements that are lower than previous
value but must still be higher than capacity recorded
in the status field of the claim. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources'
properties:
limits:
additionalProperties:
@ -4201,7 +4338,11 @@ spec:
type: object
resources:
description: 'Resources represents the minimum resources
the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources'
the volume should have. If RecoverVolumeExpansionFailure
feature is enabled users are allowed to specify resource
requirements that are lower than previous value but
must still be higher than capacity recorded in the status
field of the claim. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources'
properties:
limits:
additionalProperties:
@ -4297,6 +4438,27 @@ spec:
items:
type: string
type: array
allocatedResources:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: The storage resource within AllocatedResources
tracks the capacity allocated to a PVC. It may be larger
than the actual capacity when a volume expansion operation
is requested. For storage quota, the larger value from
allocatedResources and PVC.spec.resources is used. If
allocatedResources is not set, PVC.spec.resources alone
is used for quota calculation. If a volume expansion
capacity request is lowered, allocatedResources is only
lowered if there are no expansion operations in progress
and if the actual volume capacity is equal or lower
than the requested capacity. This is an alpha field
and requires enabling RecoverVolumeExpansionFailure
feature.
type: object
capacity:
additionalProperties:
anyOf:
@ -4349,6 +4511,13 @@ spec:
phase:
description: Phase represents the current phase of PersistentVolumeClaim.
type: string
resizeStatus:
description: ResizeStatus stores status of resize operation.
ResizeStatus is not set by default but when expansion
is complete resizeStatus is set to empty string by resize
controller or kubelet. This is an alpha field and requires
enabling RecoverVolumeExpansionFailure feature.
type: string
type: object
type: object
type: object
@ -4912,9 +5081,7 @@ spec:
volumes if the CSI driver is meant to be used that way - see
the documentation of the driver for more information. \n A
pod can use both types of ephemeral volumes and persistent
volumes at the same time. \n This is a beta feature and only
available when the GenericEphemeralVolume feature gate is
enabled."
volumes at the same time."
properties:
volumeClaimTemplate:
description: "Will be used to create a stand-alone PVC to
@ -5031,7 +5198,11 @@ spec:
type: object
resources:
description: 'Resources represents the minimum resources
the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources'
the volume should have. If RecoverVolumeExpansionFailure
feature is enabled users are allowed to specify
resource requirements that are lower than previous
value but must still be higher than capacity recorded
in the status field of the claim. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources'
properties:
limits:
additionalProperties:

View File

@ -164,3 +164,39 @@ Use the prometheus-node-exporter namespace override for multi-namespace deployme
{{- print "policy/v1beta1" -}}
{{- end -}}
{{- end -}}
{{/* Get value based on current Kubernetes version */}}
{{- define "kube-prometheus-stack.kubeVersionDefaultValue" -}}
{{- $values := index . 0 -}}
{{- $kubeVersion := index . 1 -}}
{{- $old := index . 2 -}}
{{- $new := index . 3 -}}
{{- $default := index . 4 -}}
{{- if kindIs "invalid" $default -}}
{{- if semverCompare $kubeVersion (include "kube-prometheus-stack.kubeVersion" $values) -}}
{{- print $new -}}
{{- else -}}
{{- print $old -}}
{{- end -}}
{{- else -}}
{{- print $default }}
{{- end -}}
{{- end -}}
{{/* Get value for kube-controller-manager depending on insecure scraping availability */}}
{{- define "kube-prometheus-stack.kubeControllerManager.insecureScrape" -}}
{{- $values := index . 0 -}}
{{- $insecure := index . 1 -}}
{{- $secure := index . 2 -}}
{{- $userValue := index . 3 -}}
{{- include "kube-prometheus-stack.kubeVersionDefaultValue" (list $values ">= 1.22-0" $insecure $secure $userValue) -}}
{{- end -}}
{{/* Get value for kube-scheduler depending on insecure scraping availability */}}
{{- define "kube-prometheus-stack.kubeScheduler.insecureScrape" -}}
{{- $values := index . 0 -}}
{{- $insecure := index . 1 -}}
{{- $secure := index . 2 -}}
{{- $userValue := index . 3 -}}
{{- include "kube-prometheus-stack.kubeVersionDefaultValue" (list $values ">= 1.23-0" $insecure $secure $userValue) -}}
{{- end -}}

View File

@ -13,7 +13,15 @@ metadata:
{{- end }}
spec:
{{- if .Values.alertmanager.alertmanagerSpec.image }}
image: {{ .Values.alertmanager.alertmanagerSpec.image.repository }}:{{ .Values.alertmanager.alertmanagerSpec.image.tag }}
{{- if and .Values.alertmanager.alertmanagerSpec.image.tag .Values.alertmanager.alertmanagerSpec.image.sha }}
image: "{{ .Values.alertmanager.alertmanagerSpec.image.repository }}:{{ .Values.alertmanager.alertmanagerSpec.image.tag }}@sha256:{{ .Values.alertmanager.alertmanagerSpec.image.sha }}"
{{- else if .Values.alertmanager.alertmanagerSpec.image.sha }}
image: "{{ .Values.alertmanager.alertmanagerSpec.image.repository }}@sha256:{{ .Values.alertmanager.alertmanagerSpec.image.sha }}"
{{- else if .Values.alertmanager.alertmanagerSpec.image.tag }}
image: "{{ .Values.alertmanager.alertmanagerSpec.image.repository }}:{{ .Values.alertmanager.alertmanagerSpec.image.tag }}"
{{- else }}
image: "{{ .Values.alertmanager.alertmanagerSpec.image.repository }}"
{{- end }}
version: {{ .Values.alertmanager.alertmanagerSpec.image.tag }}
{{- if .Values.alertmanager.alertmanagerSpec.image.sha }}
sha: {{ .Values.alertmanager.alertmanagerSpec.image.sha }}
@ -91,7 +99,7 @@ spec:
- topologyKey: {{ .Values.alertmanager.alertmanagerSpec.podAntiAffinityTopologyKey }}
labelSelector:
matchExpressions:
- {key: app, operator: In, values: [alertmanager]}
- {key: app.kubernetes.io/name, operator: In, values: [alertmanager]}
- {key: alertmanager, operator: In, values: [{{ template "kube-prometheus-stack.fullname" . }}-alertmanager]}
{{- else if eq .Values.alertmanager.alertmanagerSpec.podAntiAffinity "soft" }}
podAntiAffinity:
@ -101,7 +109,7 @@ spec:
topologyKey: {{ .Values.alertmanager.alertmanagerSpec.podAntiAffinityTopologyKey }}
labelSelector:
matchExpressions:
- {key: app, operator: In, values: [alertmanager]}
- {key: app.kubernetes.io/name, operator: In, values: [alertmanager]}
- {key: alertmanager, operator: In, values: [{{ template "kube-prometheus-stack.fullname" . }}-alertmanager]}
{{- end }}
{{- if .Values.alertmanager.alertmanagerSpec.tolerations }}

View File

@ -16,6 +16,6 @@ spec:
{{- end }}
selector:
matchLabels:
app: alertmanager
app.kubernetes.io/name: alertmanager
alertmanager: {{ template "kube-prometheus-stack.fullname" . }}-alertmanager
{{- end }}

View File

@ -12,12 +12,6 @@ metadata:
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
spec:
privileged: false
# Required to prevent escalations to root.
# allowPrivilegeEscalation: false
# This is redundant with non-root + disallow privilege escalation,
# but we can provide it for defense in depth.
#requiredDropCapabilities:
# - ALL
# Allow core volume types.
volumes:
- 'configMap'
@ -38,13 +32,13 @@ spec:
supplementalGroups:
rule: 'MustRunAs'
ranges:
# Forbid adding the root group.
# Allow adding the root group.
- min: 0
max: 65535
fsGroup:
rule: 'MustRunAs'
ranges:
# Forbid adding the root group.
# Allow adding the root group.
- min: 0
max: 65535
readOnlyRootFilesystem: false

View File

@ -12,9 +12,11 @@ spec:
clusterIP: None
ports:
- name: http-metrics
port: {{ .Values.kubeControllerManager.service.port }}
{{- $kubeControllerManagerDefaultInsecurePort := 10252 }}
{{- $kubeControllerManagerDefaultSecurePort := 10257 }}
port: {{ include "kube-prometheus-stack.kubeControllerManager.insecureScrape" (list . $kubeControllerManagerDefaultInsecurePort $kubeControllerManagerDefaultSecurePort .Values.kubeControllerManager.service.port) }}
protocol: TCP
targetPort: {{ .Values.kubeControllerManager.service.targetPort }}
targetPort: {{ include "kube-prometheus-stack.kubeControllerManager.insecureScrape" (list . $kubeControllerManagerDefaultInsecurePort $kubeControllerManagerDefaultSecurePort .Values.kubeControllerManager.service.targetPort) }}
{{- if .Values.kubeControllerManager.endpoints }}{{- else }}
selector:
{{- if .Values.kubeControllerManager.service.selector }}

View File

@ -25,12 +25,12 @@ spec:
{{- if .Values.kubeControllerManager.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.kubeControllerManager.serviceMonitor.proxyUrl}}
{{- end }}
{{- if .Values.kubeControllerManager.serviceMonitor.https }}
{{- if eq (include "kube-prometheus-stack.kubeControllerManager.insecureScrape" (list . false true .Values.kubeControllerManager.serviceMonitor.https )) "true" }}
scheme: https
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
{{- if .Values.kubeControllerManager.serviceMonitor.insecureSkipVerify }}
insecureSkipVerify: {{ .Values.kubeControllerManager.serviceMonitor.insecureSkipVerify }}
{{- if eq (include "kube-prometheus-stack.kubeControllerManager.insecureScrape" (list . nil true .Values.kubeControllerManager.serviceMonitor.insecureSkipVerify)) "true" }}
insecureSkipVerify: true
{{- end }}
{{- if .Values.kubeControllerManager.serviceMonitor.serverName }}
serverName: {{ .Values.kubeControllerManager.serviceMonitor.serverName }}

View File

@ -12,9 +12,11 @@ spec:
clusterIP: None
ports:
- name: http-metrics
port: {{ .Values.kubeScheduler.service.port}}
{{- $kubeSchedulerDefaultInsecurePort := 10251 }}
{{- $kubeSchedulerDefaultSecurePort := 10259 }}
port: {{ include "kube-prometheus-stack.kubeScheduler.insecureScrape" (list . $kubeSchedulerDefaultInsecurePort $kubeSchedulerDefaultSecurePort .Values.kubeScheduler.service.port) }}
protocol: TCP
targetPort: {{ .Values.kubeScheduler.service.targetPort}}
targetPort: {{ include "kube-prometheus-stack.kubeScheduler.insecureScrape" (list . $kubeSchedulerDefaultInsecurePort $kubeSchedulerDefaultSecurePort .Values.kubeScheduler.service.targetPort) }}
{{- if .Values.kubeScheduler.endpoints }}{{- else }}
selector:
{{- if .Values.kubeScheduler.service.selector }}

View File

@ -25,12 +25,12 @@ spec:
{{- if .Values.kubeScheduler.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.kubeScheduler.serviceMonitor.proxyUrl}}
{{- end }}
{{- if .Values.kubeScheduler.serviceMonitor.https }}
{{- if eq (include "kube-prometheus-stack.kubeScheduler.insecureScrape" (list . false true .Values.kubeScheduler.serviceMonitor.https )) "true" }}
scheme: https
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
{{- if .Values.kubeScheduler.serviceMonitor.insecureSkipVerify }}
insecureSkipVerify: {{ .Values.kubeScheduler.serviceMonitor.insecureSkipVerify }}
{{- if eq (include "kube-prometheus-stack.kubeScheduler.insecureScrape" (list . nil true .Values.kubeScheduler.serviceMonitor.insecureSkipVerify)) "true" }}
insecureSkipVerify: true
{{- end }}
{{- if .Values.kubeScheduler.serviceMonitor.serverName }}
serverName: {{ .Values.kubeScheduler.serviceMonitor.serverName }}

View File

@ -1,63 +0,0 @@
{{- if .Values.kubeStateMetrics.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ template "kube-prometheus-stack.fullname" . }}-kube-state-metrics
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}-kube-state-metrics
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
spec:
jobLabel: app.kubernetes.io/name
endpoints:
- port: http
{{- if .Values.kubeStateMetrics.serviceMonitor.interval }}
interval: {{ .Values.kubeStateMetrics.serviceMonitor.interval }}
{{- end }}
{{- if .Values.kubeStateMetrics.serviceMonitor.scrapeTimeout }}
scrapeTimeout: {{ .Values.kubeStateMetrics.serviceMonitor.scrapeTimeout }}
{{- end }}
{{- if .Values.kubeStateMetrics.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.kubeStateMetrics.serviceMonitor.proxyUrl}}
{{- end }}
honorLabels: {{ .Values.kubeStateMetrics.serviceMonitor.honorLabels }}
{{- if .Values.kubeStateMetrics.serviceMonitor.metricRelabelings }}
metricRelabelings:
{{ tpl (toYaml .Values.kubeStateMetrics.serviceMonitor.metricRelabelings | indent 4) . }}
{{- end }}
{{- if .Values.kubeStateMetrics.serviceMonitor.relabelings }}
relabelings:
{{ tpl (toYaml .Values.kubeStateMetrics.serviceMonitor.relabelings | indent 4) . }}
{{- end }}
{{- if .Values.kubeStateMetrics.serviceMonitor.selfMonitor.enabled }}
- port: metrics
{{- if .Values.kubeStateMetrics.serviceMonitor.interval }}
interval: {{ .Values.kubeStateMetrics.serviceMonitor.interval }}
{{- end }}
{{- if .Values.kubeStateMetrics.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.kubeStateMetrics.serviceMonitor.proxyUrl}}
{{- end }}
honorLabels: {{ .Values.kubeStateMetrics.serviceMonitor.honorLabels }}
{{- if .Values.kubeStateMetrics.serviceMonitor.metricRelabelings }}
metricRelabelings:
{{ tpl (toYaml .Values.kubeStateMetrics.serviceMonitor.metricRelabelings | indent 4) . }}
{{- end }}
{{- if .Values.kubeStateMetrics.serviceMonitor.relabelings }}
relabelings:
{{ tpl (toYaml .Values.kubeStateMetrics.serviceMonitor.relabelings | indent 4) . }}
{{- end }}
{{- end }}
{{- if ne (include "kube-prometheus-stack.namespace" .) (include "kube-prometheus-stack-kube-state-metrics.namespace" .) }}
namespaceSelector:
matchNames:
- {{ printf "%s" (include "kube-prometheus-stack-kube-state-metrics.namespace" .) | quote }}
{{- end }}
selector:
matchLabels:
{{- if .Values.kubeStateMetrics.serviceMonitor.selectorOverride }}
{{ toYaml .Values.kubeStateMetrics.serviceMonitor.selectorOverride | indent 6 }}
{{ else }}
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/instance: "{{ $.Release.Name }}"
{{- end }}
{{- end }}

View File

@ -1,40 +0,0 @@
{{- if .Values.nodeExporter.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ template "kube-prometheus-stack.fullname" . }}-node-exporter
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}-node-exporter
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
spec:
jobLabel: {{ .Values.nodeExporter.jobLabel }}
selector:
matchLabels:
app: prometheus-node-exporter
release: {{ $.Release.Name }}
{{- if ne (include "kube-prometheus-stack.namespace" .) (include "kube-prometheus-stack-prometheus-node-exporter.namespace" .) }}
namespaceSelector:
matchNames:
- {{ printf "%s" (include "kube-prometheus-stack-prometheus-node-exporter.namespace" .) | quote }}
{{- end }}
endpoints:
- port: {{ index .Values "prometheus-node-exporter" "service" "portName" }}
{{- if .Values.nodeExporter.serviceMonitor.interval }}
interval: {{ .Values.nodeExporter.serviceMonitor.interval }}
{{- end }}
{{- if .Values.nodeExporter.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.nodeExporter.serviceMonitor.proxyUrl}}
{{- end }}
{{- if .Values.nodeExporter.serviceMonitor.scrapeTimeout }}
scrapeTimeout: {{ .Values.nodeExporter.serviceMonitor.scrapeTimeout }}
{{- end }}
{{- if .Values.nodeExporter.serviceMonitor.metricRelabelings }}
metricRelabelings:
{{ tpl (toYaml .Values.nodeExporter.serviceMonitor.metricRelabelings | indent 4) . }}
{{- end }}
{{- if .Values.nodeExporter.serviceMonitor.relabelings }}
relabelings:
{{ tpl (toYaml .Values.nodeExporter.serviceMonitor.relabelings | indent 4) . }}
{{- end }}
{{- end }}

View File

@ -14,12 +14,6 @@ metadata:
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
spec:
privileged: false
# Required to prevent escalations to root.
# allowPrivilegeEscalation: false
# This is redundant with non-root + disallow privilege escalation,
# but we can provide it for defense in depth.
#requiredDropCapabilities:
# - ALL
# Allow core volume types.
volumes:
- 'configMap'
@ -40,13 +34,13 @@ spec:
supplementalGroups:
rule: 'MustRunAs'
ranges:
# Forbid adding the root group.
# Allow adding the root group.
- min: 0
max: 65535
fsGroup:
rule: 'MustRunAs'
ranges:
# Forbid adding the root group.
# Allow adding the root group.
- min: 0
max: 65535
readOnlyRootFilesystem: false

View File

@ -18,7 +18,7 @@ metadata:
namespace: {{ template "kube-prometheus-stack.namespace" . }}
spec:
secretName: {{ template "kube-prometheus-stack.fullname" . }}-root-cert
duration: 43800h0m0s # 5y
duration: {{ .Values.prometheusOperator.admissionWebhooks.certManager.rootCert.duration | default "43800h0m0s" | quote }}
issuerRef:
name: {{ template "kube-prometheus-stack.fullname" . }}-self-signed-issuer
commonName: "ca.webhook.kube-prometheus-stack"
@ -35,7 +35,7 @@ spec:
secretName: {{ template "kube-prometheus-stack.fullname" . }}-root-cert
{{- end }}
---
# generate a serving certificate for the apiservices to use
# generate a server certificate for the apiservices to use
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
@ -43,7 +43,7 @@ metadata:
namespace: {{ template "kube-prometheus-stack.namespace" . }}
spec:
secretName: {{ template "kube-prometheus-stack.fullname" . }}-admission
duration: 8760h0m0s # 1y
duration: {{ .Values.prometheusOperator.admissionWebhooks.certManager.admissionCert.duration | default "8760h0m0s" | quote }}
issuerRef:
{{- if .Values.prometheusOperator.admissionWebhooks.certManager.issuerRef }}
{{- toYaml .Values.prometheusOperator.admissionWebhooks.certManager.issuerRef | nindent 4 }}

View File

@ -53,7 +53,7 @@ spec:
- --deny-namespaces={{ .Values.prometheusOperator.denyNamespaces | join "," }}
{{- end }}
{{- with $.Values.prometheusOperator.namespaces }}
{{ $ns := .additional }}
{{ $ns := default (list nil) .additional }}
{{- if .releaseNamespace }}
{{- $ns = append $ns $namespace }}
{{- end }}

View File

@ -12,12 +12,6 @@ metadata:
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
spec:
privileged: false
# Required to prevent escalations to root.
# allowPrivilegeEscalation: false
# This is redundant with non-root + disallow privilege escalation,
# but we can provide it for defense in depth.
#requiredDropCapabilities:
# - ALL
# Allow core volume types.
volumes:
- 'configMap'
@ -38,13 +32,13 @@ spec:
supplementalGroups:
rule: 'MustRunAs'
ranges:
# Forbid adding the root group.
# Allow adding the root group.
- min: 0
max: 65535
fsGroup:
rule: 'MustRunAs'
ranges:
# Forbid adding the root group.
# Allow adding the root group.
- min: 0
max: 65535
readOnlyRootFilesystem: false

View File

@ -5,22 +5,22 @@ https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-promet
{{- define "rules.names" }}
rules:
- "alertmanager.rules"
- "config-reloaders"
- "etcd"
- "general.rules"
- "k8s.rules"
- "kube-apiserver.rules"
- "kube-apiserver-availability.rules"
- "kube-apiserver-error"
- "kube-apiserver-slos"
- "kube-prometheus-general.rules"
- "kube-prometheus-node-alerting.rules"
- "kube-prometheus-node-recording.rules"
- "kube-scheduler.rules"
- "kube-state-metrics"
- "kubelet.rules"
- "kubernetes-absent"
- "kubernetes-apps"
- "kubernetes-resources"
- "kubernetes-storage"
- "kubernetes-system"
- "kubernetes-system-kube-proxy"
- "kubernetes-system-apiserver"
- "kubernetes-system-kubelet"
- "kubernetes-system-controller-manager"
@ -29,10 +29,6 @@ rules:
- "node-exporter"
- "node.rules"
- "node-network"
- "node-time"
- "prometheus-operator"
- "prometheus.rules"
- "prometheus"
- "kubernetes-apps"
- "etcd"
{{- end }}

View File

@ -202,7 +202,6 @@ spec:
{{- else if .Values.prometheus.prometheusSpec.ruleSelectorNilUsesHelmValues }}
ruleSelector:
matchLabels:
app: {{ template "kube-prometheus-stack.name" . }}
release: {{ $.Release.Name | quote }}
{{ else }}
ruleSelector: {}

View File

@ -12,12 +12,6 @@ metadata:
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
spec:
privileged: false
# Required to prevent escalations to root.
# allowPrivilegeEscalation: false
# This is redundant with non-root + disallow privilege escalation,
# but we can provide it for defense in depth.
#requiredDropCapabilities:
# - ALL
# Allow core volume types.
volumes:
- 'configMap'
@ -41,13 +35,13 @@ spec:
supplementalGroups:
rule: 'MustRunAs'
ranges:
# Forbid adding the root group.
# Allow adding the root group.
- min: 0
max: 65535
fsGroup:
rule: 'MustRunAs'
ranges:
# Forbid adding the root group.
# Allow adding the root group.
- min: 0
max: 65535
readOnlyRootFilesystem: false

View File

@ -50,6 +50,7 @@ spec:
{{- if .Values.prometheus.service.additionalPorts }}
{{ toYaml .Values.prometheus.service.additionalPorts | indent 2 }}
{{- end }}
publishNotReadyAddresses: {{ .Values.prometheus.service.publishNotReadyAddresses }}
selector:
app.kubernetes.io/name: prometheus
prometheus: {{ template "kube-prometheus-stack.fullname" . }}-prometheus

View File

@ -35,17 +35,16 @@ defaultRules:
rules:
alertmanager: true
etcd: true
configReloaders: true
general: true
k8s: true
kubeApiserver: true
kubeApiserverAvailability: true
kubeApiserverError: true
kubeApiserverSlos: true
kubelet: true
kubeProxy: true
kubePrometheusGeneral: true
kubePrometheusNodeAlerting: true
kubePrometheusNodeRecording: true
kubernetesAbsent: true
kubernetesApps: true
kubernetesResources: true
kubernetesStorage: true
@ -54,12 +53,11 @@ defaultRules:
kubeStateMetrics: true
network: true
node: true
nodeExporterAlerting: true
nodeExporterRecording: true
prometheus: true
prometheusOperator: true
time: true
## Runbook url prefix for default rules
runbookUrl: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#
## Reduce app namespace alert scope
appNamespacesTarget: ".*"
@ -71,6 +69,14 @@ defaultRules:
## Additional labels for PrometheusRule alerts
additionalRuleLabels: {}
## Prefix for runbook URLs. Use this to override the first part of the runbookURLs that is common to all rules.
runbookUrl: "https://runbooks.prometheus-operator.dev/runbooks"
## Disabled PrometheusRule alerts
disabled: {}
# KubeAPIDown: true
# NodeRAIDDegraded: true
## Deprecated way to provide custom recording or alerting rules to be deployed into the cluster.
##
# additionalPrometheusRules: []
@ -95,7 +101,7 @@ additionalPrometheusRulesMap: {}
global:
rbac:
create: true
pspEnabled: true
pspEnabled: false
pspAnnotations: {}
## Specify pod annotations
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#apparmor
@ -361,13 +367,13 @@ alertmanager:
scheme: ""
## tlsConfig: TLS configuration to use when scraping the endpoint. For example if using istio mTLS.
## Of type: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#tlsconfig
## Of type: https://github.com/coreos/prometheus-operator/blob/main/Documentation/api.md#tlsconfig
tlsConfig: {}
bearerTokenFile:
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
@ -375,7 +381,7 @@ alertmanager:
# sourceLabels: [__name__]
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
@ -386,7 +392,7 @@ alertmanager:
# action: replace
## Settings affecting alertmanagerSpec
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#alertmanagerspec
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#alertmanagerspec
##
alertmanagerSpec:
## Standard object's metadata. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#metadata
@ -475,7 +481,7 @@ alertmanager:
retention: 120h
## Storage is the definition of how storage will be used by the Alertmanager instances.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/user-guides/storage.md
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/storage.md
##
storage: {}
# volumeClaimTemplate:
@ -644,6 +650,11 @@ grafana:
adminPassword: prom-operator
rbac:
## If true, Grafana PSPs will be created
##
pspEnabled: false
ingress:
## If true, Grafana Ingress will be created
##
@ -759,7 +770,7 @@ grafana:
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
@ -767,7 +778,7 @@ grafana:
# sourceLabels: [__name__]
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
@ -799,7 +810,7 @@ kubeApiServer:
provider: kubernetes
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
@ -807,7 +818,7 @@ kubeApiServer:
# sourceLabels: [__name__]
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels:
@ -855,7 +866,7 @@ kubelet:
resourcePath: "/metrics/resource/v1alpha1"
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
cAdvisorMetricRelabelings: []
# - sourceLabels: [__name__, image]
@ -870,7 +881,7 @@ kubelet:
# action: drop
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
probesMetricRelabelings: []
# - sourceLabels: [__name__, image]
@ -885,7 +896,7 @@ kubelet:
# action: drop
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
## metrics_path is required to match upstream rules and charts
cAdvisorRelabelings:
@ -899,7 +910,7 @@ kubelet:
# action: replace
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
probesRelabelings:
- sourceLabels: [__metrics_path__]
@ -912,7 +923,7 @@ kubelet:
# action: replace
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
resourceRelabelings:
- sourceLabels: [__metrics_path__]
@ -925,7 +936,7 @@ kubelet:
# action: replace
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - sourceLabels: [__name__, image]
@ -940,7 +951,7 @@ kubelet:
# action: drop
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
## metrics_path is required to match upstream rules and charts
relabelings:
@ -969,8 +980,11 @@ kubeControllerManager:
##
service:
enabled: true
port: 10252
targetPort: 10252
## If null or unset, the value is determined dynamically based on target Kubernetes version due to change
## of default port in Kubernetes 1.22.
##
port: null
targetPort: null
# selector:
# component: kube-controller-manager
@ -985,9 +999,10 @@ kubeControllerManager:
proxyUrl: ""
## Enable scraping kube-controller-manager over https.
## Requires proper certs (not self-signed) and delegated authentication/authorization checks
## Requires proper certs (not self-signed) and delegated authentication/authorization checks.
## If null or unset, the value is determined dynamically based on target Kubernetes version.
##
https: false
https: null
# Skip TLS certificate validation when scraping
insecureSkipVerify: null
@ -996,7 +1011,7 @@ kubeControllerManager:
serverName: null
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
@ -1004,7 +1019,7 @@ kubeControllerManager:
# sourceLabels: [__name__]
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
@ -1033,7 +1048,7 @@ coreDns:
proxyUrl: ""
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
@ -1041,7 +1056,7 @@ coreDns:
# sourceLabels: [__name__]
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
@ -1074,7 +1089,7 @@ kubeDns:
proxyUrl: ""
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
@ -1082,7 +1097,7 @@ kubeDns:
# sourceLabels: [__name__]
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
@ -1093,7 +1108,7 @@ kubeDns:
# action: replace
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
dnsmasqMetricRelabelings: []
# - action: keep
@ -1101,7 +1116,7 @@ kubeDns:
# sourceLabels: [__name__]
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
dnsmasqRelabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
@ -1159,7 +1174,7 @@ kubeEtcd:
keyFile: ""
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
@ -1167,7 +1182,7 @@ kubeEtcd:
# sourceLabels: [__name__]
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
@ -1194,8 +1209,11 @@ kubeScheduler:
##
service:
enabled: true
port: 10251
targetPort: 10251
## If null or unset, the value is determined dynamically based on target Kubernetes version due to change
## of default port in Kubernetes 1.23.
##
port: null
targetPort: null
# selector:
# component: kube-scheduler
@ -1208,9 +1226,10 @@ kubeScheduler:
##
proxyUrl: ""
## Enable scraping kube-scheduler over https.
## Requires proper certs (not self-signed) and delegated authentication/authorization checks
## Requires proper certs (not self-signed) and delegated authentication/authorization checks.
## If null or unset, the value is determined dynamically based on target Kubernetes version.
##
https: false
https: null
## Skip TLS certificate validation when scraping
insecureSkipVerify: null
@ -1219,7 +1238,7 @@ kubeScheduler:
serverName: null
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
@ -1227,7 +1246,7 @@ kubeScheduler:
# sourceLabels: [__name__]
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
@ -1273,7 +1292,7 @@ kubeProxy:
https: false
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
@ -1281,7 +1300,7 @@ kubeProxy:
# sourceLabels: [__name__]
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
relabelings: []
# - action: keep
@ -1293,45 +1312,6 @@ kubeProxy:
##
kubeStateMetrics:
enabled: true
serviceMonitor:
## Scrape interval. If not set, the Prometheus default scrape interval is used.
##
interval: ""
## Scrape Timeout. If not set, the Prometheus default scrape timeout is used.
##
scrapeTimeout: ""
## proxyUrl: URL of a proxy that should be used for scraping.
##
proxyUrl: ""
## Override serviceMonitor selector
##
selectorOverride: {}
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
# sourceLabels: [__name__]
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
# separator: ;
# regex: ^(.*)$
# targetLabel: nodename
# replacement: $1
# action: replace
# Keep labels from scraped data, overriding server-side labels
honorLabels: true
# Enable self metrics configuration for Service Monitor
selfMonitor:
enabled: false
## Configuration for kube-state-metrics subchart
##
@ -1339,43 +1319,37 @@ kube-state-metrics:
namespaceOverride: ""
rbac:
create: true
podSecurityPolicy:
releaseLabel: true
prometheus:
monitor:
enabled: true
## Deploy node exporter as a daemonset to all nodes
##
nodeExporter:
enabled: true
## Use the value configured in prometheus-node-exporter.podLabels
##
jobLabel: jobLabel
serviceMonitor:
## Scrape interval. If not set, the Prometheus default scrape interval is used.
##
interval: ""
## Scrape Timeout. If not set, the Prometheus default scrape timeout is used.
##
scrapeTimeout: ""
## proxyUrl: URL of a proxy that should be used for scraping.
##
proxyUrl: ""
## How long until a scrape request times out. If not set, the Prometheus default scape timeout is used.
# Keep labels from scraped data, overriding server-side labels
##
scrapeTimeout: ""
honorLabels: true
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - sourceLabels: [__name__]
# separator: ;
# regex: ^node_mountstats_nfs_(event|operations|transport)_.+
# replacement: $1
# action: drop
# - action: keep
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
# sourceLabels: [__name__]
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
@ -1385,6 +1359,14 @@ nodeExporter:
# replacement: $1
# action: replace
selfMonitor:
enabled: false
## Deploy node exporter as a daemonset to all nodes
##
nodeExporter:
enabled: true
## Configuration for prometheus-node-exporter subchart
##
prometheus-node-exporter:
@ -1398,6 +1380,48 @@ prometheus-node-exporter:
- --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
service:
portName: http-metrics
prometheus:
monitor:
enabled: true
jobLabel: jobLabel
## Scrape interval. If not set, the Prometheus default scrape interval is used.
##
interval: ""
## How long until a scrape request times out. If not set, the Prometheus default scape timeout is used.
##
scrapeTimeout: ""
## proxyUrl: URL of a proxy that should be used for scraping.
##
proxyUrl: ""
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - sourceLabels: [__name__]
# separator: ;
# regex: ^node_mountstats_nfs_(event|operations|transport)_.+
# replacement: $1
# action: drop
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
# separator: ;
# regex: ^(.*)$
# targetLabel: nodename
# replacement: $1
# action: replace
rbac:
## If true, create PSPs for node-exporter
##
pspEnabled: false
## Manages Prometheus and Alertmanager components
##
@ -1453,6 +1477,11 @@ prometheusOperator:
# Use certmanager to generate webhook certs
certManager:
enabled: false
# self-signed root certificate
rootCert:
duration: "" # default to be 5y
admissionCert:
duration: "" # default to be 1y
# issuerRef:
# name: "issuer"
# kind: "ClusterIssuer"
@ -1542,7 +1571,7 @@ prometheusOperator:
# logLevel: error
## If true, the operator will create and maintain a service for scraping kubelets
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/helm/prometheus-operator/README.md
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/helm/prometheus-operator/README.md
##
kubeletService:
enabled: true
@ -1639,7 +1668,7 @@ prometheusOperator:
##
image:
repository: quay.io/prometheus-operator/prometheus-operator
tag: v0.52.0
tag: v0.53.1
sha: ""
pullPolicy: IfNotPresent
@ -1657,7 +1686,7 @@ prometheusOperator:
# image to use for config and rule reloading
image:
repository: quay.io/prometheus-operator/prometheus-config-reloader
tag: v0.52.0
tag: v0.53.1
sha: ""
# resource config for prometheusConfigReloader
@ -1673,7 +1702,7 @@ prometheusOperator:
##
thanosImage:
repository: quay.io/thanos/thanos
tag: v0.23.1
tag: v0.24.0
sha: ""
## Set a Field Selector to filter watched secrets
@ -1741,7 +1770,7 @@ prometheus:
scheme: ""
## tlsConfig: TLS configuration to use when scraping the endpoint. For example if using istio mTLS.
## Of type: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#tlsconfig
## Of type: https://github.com/coreos/prometheus-operator/blob/main/Documentation/api.md#tlsconfig
tlsConfig: {}
bearerTokenFile:
@ -1812,6 +1841,13 @@ prometheus:
##
type: ClusterIP
## Additional port to define in the Service
additionalPorts: []
## Consider that all endpoints are considered "ready" even if the Pods themselves are not
## Ref: https://kubernetes.io/docs/reference/kubernetes-api/service-resources/service-v1/#ServiceSpec
publishNotReadyAddresses: false
sessionAffinity: ""
## Configuration for creating a separate Service for each statefulset Prometheus replica
@ -1995,7 +2031,7 @@ prometheus:
scheme: ""
## tlsConfig: TLS configuration to use when scraping the endpoint. For example if using istio mTLS.
## Of type: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#tlsconfig
## Of type: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#tlsconfig
tlsConfig: {}
bearerTokenFile:
@ -2018,14 +2054,14 @@ prometheus:
# action: replace
## Settings affecting prometheusSpec
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#prometheusspec
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#prometheusspec
##
prometheusSpec:
## If true, pass --storage.tsdb.max-block-duration=2h to prometheus. This is already done if using Thanos
##
disableCompaction: false
## APIServerConfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#apiserverconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#apiserverconfig
##
apiserverConfig: {}
@ -2054,7 +2090,7 @@ prometheus:
enableAdminAPI: false
## WebTLSConfig defines the TLS parameters for HTTPS
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#webtlsconfig
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#webtlsconfig
web: {}
# EnableFeatures API enables access to Prometheus disabled features.
@ -2066,7 +2102,7 @@ prometheus:
##
image:
repository: quay.io/prometheus/prometheus
tag: v2.31.1
tag: v2.32.1
sha: ""
## Tolerations for use with node taints
@ -2090,7 +2126,7 @@ prometheus:
# app: prometheus
## Alertmanagers to which alerts will be sent
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#alertmanagerendpoints
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#alertmanagerendpoints
##
## Default configuration will connect to the alertmanager deployed as part of this release
##
@ -2146,13 +2182,13 @@ prometheus:
configMaps: []
## QuerySpec defines the query command line flags when starting Prometheus.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#queryspec
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#queryspec
##
query: {}
## Namespaces to be selected for PrometheusRules discovery.
## If nil, select own namespace. Namespaces to be selected for ServiceMonitor discovery.
## See https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#namespaceselector for usage
## See https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#namespaceselector for usage
##
ruleNamespaceSelector: {}
@ -2220,7 +2256,7 @@ prometheus:
# prometheus: somelabel
## Namespaces to be selected for PodMonitor discovery.
## See https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#namespaceselector for usage
## See https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#namespaceselector for usage
##
podMonitorNamespaceSelector: {}
@ -2240,7 +2276,7 @@ prometheus:
# prometheus: somelabel
## Namespaces to be selected for Probe discovery.
## See https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#namespaceselector for usage
## See https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#namespaceselector for usage
##
probeNamespaceSelector: {}
@ -2321,14 +2357,14 @@ prometheus:
# - e2e-az2
## The remote_read spec configuration for Prometheus.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#remotereadspec
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#remotereadspec
remoteRead: []
# - url: http://remote1/read
## additionalRemoteRead is appended to remoteRead
additionalRemoteRead: []
## The remote_write spec configuration for Prometheus.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#remotewritespec
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#remotewritespec
remoteWrite: []
# - url: http://remote1/push
## additionalRemoteWrite is appended to remoteWrite
@ -2344,7 +2380,7 @@ prometheus:
# memory: 400Mi
## Prometheus StorageSpec for persistent data
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/user-guides/storage.md
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/storage.md
##
storageSpec: {}
## Using PersistentVolumeClaim
@ -2460,7 +2496,7 @@ prometheus:
## SecurityContext holds pod-level security attributes and common container settings.
## This defaults to non root user with uid 1000 and gid 2000.
## https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md
## https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md
##
securityContext:
runAsGroup: 2000
@ -2475,7 +2511,7 @@ prometheus:
## Thanos configuration allows configuring various aspects of a Prometheus server in a Thanos environment.
## This section is experimental, it may change significantly without deprecation notice in any release.
## This is experimental and may change significantly without backward compatibility in any release.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#thanosspec
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#thanosspec
##
thanos: {}
# secretProviderClass:
@ -2693,6 +2729,6 @@ prometheus:
# matchNames: []
## Endpoints of the selected pods to be monitored
## https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#podmetricsendpoint
## https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#podmetricsendpoint
##
# podMetricsEndpoints: []

View File

@ -1,5 +1,5 @@
apiVersion: v1
appVersion: 1.4.1
appVersion: 1.4.2
description: A Helm chart for prometheus pushgateway
home: https://github.com/prometheus/pushgateway
keywords:
@ -13,4 +13,4 @@ maintainers:
name: prometheus-pushgateway
sources:
- https://github.com/prometheus/pushgateway
version: 1.13.0
version: 1.14.0

View File

@ -10,7 +10,7 @@ fullnameOverride: ""
image:
repository: prom/pushgateway
tag: v1.4.1
tag: v1.4.2
pullPolicy: IfNotPresent
# Optional pod imagePullSecrets

View File

@ -9,7 +9,8 @@ which jb > /dev/null || { echo "Required jb ( json-bundler ) not found!"; exit 1
if [ -r jsonnetfile.lock.json ]; then
jb update
else
jb install github.com/prometheus-operator/kube-prometheus/jsonnet/kube-prometheus@main
#jb install github.com/prometheus-operator/kube-prometheus/jsonnet/kube-prometheus@main
jb install github.com/prometheus-operator/kube-prometheus/jsonnet/kube-prometheus@release-0.9
fi
rm -rf dashboards && mkdir -p dashboards

View File

@ -56,11 +56,7 @@
},
"gridPos": { },
"id": 3,
"interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"interval": null,
"links": [ ],
"mappingType": 1,
"mappingTypes": [
@ -132,14 +128,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -241,11 +236,7 @@
},
"gridPos": { },
"id": 5,
"interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"interval": null,
"links": [ ],
"mappingType": 1,
"mappingTypes": [
@ -316,14 +307,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 6,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -416,14 +406,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 7,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -499,14 +488,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 8,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -606,11 +594,7 @@
},
"gridPos": { },
"id": 9,
"interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"interval": null,
"links": [ ],
"mappingType": 1,
"mappingTypes": [
@ -681,14 +665,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 10,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -781,14 +764,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 11,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -864,14 +846,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 12,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -959,14 +940,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 13,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": false,
"sideWidth": null,
"total": false,
@ -988,7 +968,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)",
"expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{name}}",
@ -1041,14 +1021,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 14,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": false,
"sideWidth": null,
"total": false,
@ -1070,7 +1049,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)",
"expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{name}}",
@ -1123,7 +1102,6 @@
"fillGradient": 0,
"gridPos": { },
"id": 15,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -1152,7 +1130,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name, le))",
"expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{name}}",
@ -1218,14 +1196,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 16,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -1300,14 +1277,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 17,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -1329,7 +1305,7 @@
"steppedLine": false,
"targets": [
{
"expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])",
"expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -1382,14 +1358,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 18,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -1477,7 +1452,7 @@
"value": "default"
},
"hide": 0,
"label": "Data Source",
"label": null,
"name": "datasource",
"options": [ ],
"query": "prometheus",
@ -1495,7 +1470,7 @@
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(up{job=\"apiserver\"}, cluster)",
"query": "label_values(apiserver_request_total, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@ -1515,7 +1490,7 @@
"multi": false,
"name": "instance",
"options": [ ],
"query": "label_values(up{job=\"apiserver\", cluster=\"$cluster\"}, instance)",
"query": "label_values(apiserver_request_total{job=\"apiserver\", cluster=\"$cluster\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -1619,7 +1619,7 @@
"value": "default"
},
"hide": 0,
"label": "Data Source",
"label": null,
"name": "datasource",
"options": [ ],
"query": "prometheus",

View File

@ -36,11 +36,7 @@
},
"gridPos": { },
"id": 2,
"interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"interval": null,
"links": [ ],
"mappingType": 1,
"mappingTypes": [
@ -110,7 +106,6 @@
"fillGradient": 0,
"gridPos": { },
"id": 3,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -139,7 +134,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)",
"expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} {{name}}",
@ -205,7 +200,6 @@
"fillGradient": 0,
"gridPos": { },
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -234,7 +228,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)",
"expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} {{name}}",
@ -300,7 +294,6 @@
"fillGradient": 0,
"gridPos": { },
"id": 5,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -329,7 +322,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name, le))",
"expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} {{name}}",
@ -395,14 +388,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 6,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -424,28 +416,28 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "2xx",
"refId": "A"
},
{
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "3xx",
"refId": "B"
},
{
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "4xx",
"refId": "C"
},
{
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "5xx",
@ -498,14 +490,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 7,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -527,7 +518,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}",
@ -593,7 +584,6 @@
"fillGradient": 0,
"gridPos": { },
"id": 8,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -622,7 +612,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}",
@ -688,14 +678,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 9,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -770,14 +759,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 10,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -799,7 +787,7 @@
"steppedLine": false,
"targets": [
{
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}[$__rate_interval])",
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -852,14 +840,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 11,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -947,7 +934,7 @@
"value": "default"
},
"hide": 0,
"label": "Data Source",
"label": null,
"name": "datasource",
"options": [ ],
"query": "prometheus",

View File

@ -24,12 +24,10 @@
"id": 1,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -49,7 +47,7 @@
"steppedLine": false,
"targets": [
{
"expr": "1 - sum(avg by (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\", cluster=\"$cluster\"}[$__rate_interval])))",
"expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[$__rate_interval]))",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@ -101,14 +99,11 @@
"fill": 1,
"format": "percentunit",
"id": 2,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -128,7 +123,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})",
"expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"cpu\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@ -180,14 +175,11 @@
"fill": 1,
"format": "percentunit",
"id": 3,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -207,7 +199,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})",
"expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"cpu\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@ -259,14 +251,11 @@
"fill": 1,
"format": "percentunit",
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -286,7 +275,7 @@
"steppedLine": false,
"targets": [
{
"expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{job=\"node-exporter\",cluster=\"$cluster\"})",
"expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@ -338,14 +327,11 @@
"fill": 1,
"format": "percentunit",
"id": 5,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -365,7 +351,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})",
"expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@ -417,14 +403,11 @@
"fill": 1,
"format": "percentunit",
"id": 6,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -444,7 +427,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})",
"expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@ -507,14 +490,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 7,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -598,14 +578,11 @@
"datasource": "$datasource",
"fill": 1,
"id": 8,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -764,7 +741,7 @@
],
"targets": [
{
"expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)",
"expr": "sum(kube_pod_owner{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -884,14 +861,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 9,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -911,7 +885,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)",
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -975,14 +949,11 @@
"datasource": "$datasource",
"fill": 1,
"id": 10,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1141,7 +1112,7 @@
],
"targets": [
{
"expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)",
"expr": "sum(kube_pod_owner{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1159,7 +1130,7 @@
"step": 10
},
{
"expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)",
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1177,7 +1148,7 @@
"step": 10
},
{
"expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1195,7 +1166,7 @@
"step": 10
},
{
"expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1263,12 +1234,10 @@
"id": 11,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1412,7 +1381,7 @@
],
"targets": [
{
"expr": "sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1421,7 +1390,7 @@
"step": 10
},
{
"expr": "sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1430,7 +1399,7 @@
"step": 10
},
{
"expr": "sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1439,7 +1408,7 @@
"step": 10
},
{
"expr": "sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1448,7 +1417,7 @@
"step": 10
},
{
"expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1457,7 +1426,7 @@
"step": 10
},
{
"expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1523,14 +1492,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 12,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1550,7 +1516,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -1602,14 +1568,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 13,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1629,7 +1592,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -1693,14 +1656,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 14,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1720,7 +1680,7 @@
"steppedLine": false,
"targets": [
{
"expr": "avg(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -1772,14 +1732,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 15,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1799,7 +1756,7 @@
"steppedLine": false,
"targets": [
{
"expr": "avg(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -1863,14 +1820,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 16,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1890,7 +1844,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -1942,14 +1896,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 17,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1969,7 +1920,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -2033,14 +1984,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 18,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -2060,7 +2008,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -2112,14 +2060,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 19,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -2139,7 +2084,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -2204,14 +2149,11 @@
"decimals": -1,
"fill": 10,
"id": 20,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -2231,7 +2173,7 @@
"steppedLine": false,
"targets": [
{
"expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])))",
"expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -2283,14 +2225,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 21,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -2310,7 +2249,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]))",
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -2374,14 +2313,11 @@
"datasource": "$datasource",
"fill": 1,
"id": 22,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -2529,7 +2465,7 @@
],
"targets": [
{
"expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]))",
"expr": "sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2538,7 +2474,7 @@
"step": 10
},
{
"expr": "sum by(namespace) (rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]))",
"expr": "sum by(namespace) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2547,7 +2483,7 @@
"step": 10
},
{
"expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]))",
"expr": "sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2556,7 +2492,7 @@
"step": 10
},
{
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]))",
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2565,7 +2501,7 @@
"step": 10
},
{
"expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]))",
"expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2574,7 +2510,7 @@
"step": 10
},
{
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]))",
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,

View File

@ -22,14 +22,11 @@
"fill": 1,
"format": "percentunit",
"id": 1,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -49,7 +46,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@ -101,14 +98,11 @@
"fill": 1,
"format": "percentunit",
"id": 2,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -128,7 +122,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@ -180,14 +174,11 @@
"fill": 1,
"format": "percentunit",
"id": 3,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -207,7 +198,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@ -259,14 +250,11 @@
"fill": 1,
"format": "percentunit",
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -286,7 +274,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@ -349,14 +337,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 5,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -479,14 +464,11 @@
"datasource": "$datasource",
"fill": 1,
"id": 6,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -717,14 +699,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 7,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -767,7 +746,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)",
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -847,14 +826,11 @@
"datasource": "$datasource",
"fill": 1,
"id": 8,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1028,7 +1004,7 @@
],
"targets": [
{
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)",
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1046,7 +1022,7 @@
"step": 10
},
{
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1064,7 +1040,7 @@
"step": 10
},
{
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1073,7 +1049,7 @@
"step": 10
},
{
"expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1082,7 +1058,7 @@
"step": 10
},
{
"expr": "sum(container_memory_cache{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1091,7 +1067,7 @@
"step": 10
},
{
"expr": "sum(container_memory_swap{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1159,12 +1135,10 @@
"id": 9,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1308,7 +1282,7 @@
],
"targets": [
{
"expr": "sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1317,7 +1291,7 @@
"step": 10
},
{
"expr": "sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1326,7 +1300,7 @@
"step": 10
},
{
"expr": "sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1335,7 +1309,7 @@
"step": 10
},
{
"expr": "sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1344,7 +1318,7 @@
"step": 10
},
{
"expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1353,7 +1327,7 @@
"step": 10
},
{
"expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1419,14 +1393,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 10,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1498,14 +1469,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 11,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1589,14 +1557,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 12,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1668,14 +1633,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 13,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1759,14 +1721,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 14,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1838,14 +1797,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 15,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1930,14 +1886,11 @@
"decimals": -1,
"fill": 10,
"id": 16,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1957,7 +1910,7 @@
"steppedLine": false,
"targets": [
{
"expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))",
"expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -2009,14 +1962,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 17,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -2036,7 +1986,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))",
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -2100,14 +2050,11 @@
"datasource": "$datasource",
"fill": 1,
"id": 18,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -2255,7 +2202,7 @@
],
"targets": [
{
"expr": "sum by(pod) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))",
"expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2264,7 +2211,7 @@
"step": 10
},
{
"expr": "sum by(pod) (rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))",
"expr": "sum by(pod) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2273,7 +2220,7 @@
"step": 10
},
{
"expr": "sum by(pod) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))",
"expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2282,7 +2229,7 @@
"step": 10
},
{
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))",
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2291,7 +2238,7 @@
"step": 10
},
{
"expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))",
"expr": "sum by(pod) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2300,7 +2247,7 @@
"step": 10
},
{
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))",
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2389,7 +2336,7 @@
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
"query": "label_values(kube_pod_info, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@ -2412,7 +2359,7 @@
"multi": false,
"name": "namespace",
"options": [ ],
"query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -21,14 +21,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 1,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -41,32 +38,12 @@
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "max capacity",
"color": "#F2495C",
"dashes": true,
"fill": 0,
"hiddenSeries": true,
"hideTooltip": true,
"legend": true,
"linewidth": 2,
"stack": false
}
],
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 12,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "max capacity",
"legendLink": null,
"step": 10
},
{
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "time_series",
@ -132,14 +109,11 @@
"datasource": "$datasource",
"fill": 1,
"id": 2,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -370,14 +344,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 3,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -390,32 +361,12 @@
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "max capacity",
"color": "#F2495C",
"dashes": true,
"fill": 0,
"hiddenSeries": true,
"hideTooltip": true,
"legend": true,
"linewidth": 2,
"stack": false
}
],
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 12,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "max capacity",
"legendLink": null,
"step": 10
},
{
"expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)",
"format": "time_series",
@ -481,14 +432,11 @@
"datasource": "$datasource",
"fill": 1,
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -814,7 +762,7 @@
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
"query": "label_values(kube_pod_info, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@ -837,7 +785,7 @@
"multi": true,
"name": "node",
"options": [ ],
"query": "label_values(kube_node_info{cluster=\"$cluster\"}, node)",
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, node)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -21,14 +21,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 1,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -75,7 +72,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n",
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "requests",
@ -83,7 +80,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n",
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "limits",
@ -147,14 +144,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 2,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -174,7 +168,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(increase(container_cpu_cfs_throttled_periods_total{job=\"cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) /sum(increase(container_cpu_cfs_periods_total{job=\"cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)",
"expr": "sum(increase(container_cpu_cfs_throttled_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[5m])) by (container) /sum(increase(container_cpu_cfs_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[5m])) by (container)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{container}}",
@ -247,14 +241,11 @@
"datasource": "$datasource",
"fill": 1,
"id": 3,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -485,14 +476,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -533,7 +521,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)",
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{container}}",
@ -541,7 +529,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n",
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "requests",
@ -549,7 +537,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n",
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "limits",
@ -613,14 +601,11 @@
"datasource": "$datasource",
"fill": 1,
"id": 5,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -794,7 +779,7 @@
],
"targets": [
{
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)",
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -812,7 +797,7 @@
"step": 10
},
{
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -830,7 +815,7 @@
"step": 10
},
{
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -839,7 +824,7 @@
"step": 10
},
{
"expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -848,7 +833,7 @@
"step": 10
},
{
"expr": "sum(container_memory_cache{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -857,7 +842,7 @@
"step": 10
},
{
"expr": "sum(container_memory_swap{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -925,12 +910,10 @@
"id": 6,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -950,7 +933,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1004,12 +987,10 @@
"id": 7,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1029,7 +1010,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1095,12 +1076,10 @@
"id": 8,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1120,7 +1099,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1174,12 +1153,10 @@
"id": 9,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1199,7 +1176,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1265,12 +1242,10 @@
"id": 10,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1290,7 +1265,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1344,12 +1319,10 @@
"id": 11,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1369,7 +1342,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1434,14 +1407,11 @@
"decimals": -1,
"fill": 10,
"id": 12,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1461,7 +1431,7 @@
"steppedLine": false,
"targets": [
{
"expr": "ceil(sum by(pod) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))",
"expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[5m])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Reads",
@ -1469,7 +1439,7 @@
"step": 10
},
{
"expr": "ceil(sum by(pod) (rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))",
"expr": "ceil(sum by(pod) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[5m])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Writes",
@ -1521,14 +1491,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 13,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1548,7 +1515,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))",
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Reads",
@ -1556,7 +1523,7 @@
"step": 10
},
{
"expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))",
"expr": "sum by(pod) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Writes",
@ -1621,14 +1588,11 @@
"decimals": -1,
"fill": 10,
"id": 14,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1648,7 +1612,7 @@
"steppedLine": false,
"targets": [
{
"expr": "ceil(sum by(container) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))",
"expr": "ceil(sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{container}}",
@ -1700,14 +1664,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 15,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1727,7 +1688,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{container}}",
@ -1791,14 +1752,11 @@
"datasource": "$datasource",
"fill": 1,
"id": 16,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1946,7 +1904,7 @@
],
"targets": [
{
"expr": "sum by(container) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"expr": "sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1955,7 +1913,7 @@
"step": 10
},
{
"expr": "sum by(container) (rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"expr": "sum by(container) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1964,7 +1922,7 @@
"step": 10
},
{
"expr": "sum by(container) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"expr": "sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1973,7 +1931,7 @@
"step": 10
},
{
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1982,7 +1940,7 @@
"step": 10
},
{
"expr": "sum by(container) (rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"expr": "sum by(container) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1991,7 +1949,7 @@
"step": 10
},
{
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2080,7 +2038,7 @@
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
"query": "label_values(kube_pod_info, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@ -2103,7 +2061,7 @@
"multi": false,
"name": "namespace",
"options": [ ],
"query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
"refresh": 2,
"regex": "",
"sort": 1,
@ -2126,7 +2084,7 @@
"multi": false,
"name": "pod",
"options": [ ],
"query": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, pod)",
"query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=\"$namespace\"}, pod)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -21,14 +21,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 1,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -112,14 +109,11 @@
"datasource": "$datasource",
"fill": 1,
"id": 2,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -257,7 +251,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -266,7 +260,7 @@
"step": 10
},
{
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -275,7 +269,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -284,7 +278,7 @@
"step": 10
},
{
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -350,14 +344,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 3,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -441,14 +432,11 @@
"datasource": "$datasource",
"fill": 1,
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -586,7 +574,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -595,7 +583,7 @@
"step": 10
},
{
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -604,7 +592,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -613,7 +601,7 @@
"step": 10
},
{
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -681,12 +669,10 @@
"id": 5,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -830,7 +816,7 @@
],
"targets": [
{
"expr": "(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -839,7 +825,7 @@
"step": 10
},
{
"expr": "(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -848,7 +834,7 @@
"step": 10
},
{
"expr": "(sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -857,7 +843,7 @@
"step": 10
},
{
"expr": "(sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -866,7 +852,7 @@
"step": 10
},
{
"expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -875,7 +861,7 @@
"step": 10
},
{
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -941,14 +927,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 6,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -968,7 +951,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1020,14 +1003,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 7,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1047,7 +1027,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1111,14 +1091,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 8,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1138,7 +1115,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(avg(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1190,14 +1167,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 9,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1217,7 +1191,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(avg(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1281,14 +1255,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 10,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1308,7 +1279,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1360,14 +1331,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 11,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1387,7 +1355,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1451,14 +1419,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 12,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1478,7 +1443,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1530,14 +1495,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 13,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1557,7 +1519,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1644,7 +1606,7 @@
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
"query": "label_values(kube_pod_info, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@ -1667,30 +1629,7 @@
"multi": false,
"name": "namespace",
"options": [ ],
"query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"text": "",
"value": ""
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "type",
"options": [ ],
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\"}, workload_type)",
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
"refresh": 2,
"regex": "",
"sort": 1,
@ -1713,7 +1652,30 @@
"multi": false,
"name": "workload",
"options": [ ],
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}, workload)",
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"text": "",
"value": ""
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "type",
"options": [ ],
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -21,14 +21,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 1,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -151,14 +148,11 @@
"datasource": "$datasource",
"fill": 1,
"id": 2,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -335,7 +329,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -344,7 +338,7 @@
"step": 10
},
{
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -353,7 +347,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -362,7 +356,7 @@
"step": 10
},
{
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -428,14 +422,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 3,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -478,7 +469,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(\n container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}} - {{workload_type}}",
@ -558,14 +549,11 @@
"datasource": "$datasource",
"fill": 1,
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -733,7 +721,7 @@
"step": 10
},
{
"expr": "sum(\n container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -742,7 +730,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -751,7 +739,7 @@
"step": 10
},
{
"expr": "sum(\n container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -760,7 +748,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -769,7 +757,7 @@
"step": 10
},
{
"expr": "sum(\n container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -837,12 +825,10 @@
"id": 5,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1001,7 +987,7 @@
],
"targets": [
{
"expr": "(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1010,7 +996,7 @@
"step": 10
},
{
"expr": "(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1019,7 +1005,7 @@
"step": 10
},
{
"expr": "(sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1028,7 +1014,7 @@
"step": 10
},
{
"expr": "(sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1037,7 +1023,7 @@
"step": 10
},
{
"expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1046,7 +1032,7 @@
"step": 10
},
{
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1112,14 +1098,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 6,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1139,7 +1122,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@ -1191,14 +1174,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 7,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1218,7 +1198,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@ -1282,14 +1262,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 8,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1309,7 +1286,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(avg(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@ -1361,14 +1338,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 9,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1388,7 +1362,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(avg(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@ -1452,14 +1426,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 10,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1479,7 +1450,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@ -1531,14 +1502,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 11,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1558,7 +1526,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@ -1622,14 +1590,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 12,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1649,7 +1614,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@ -1701,14 +1666,11 @@
"datasource": "$datasource",
"fill": 10,
"id": 13,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1728,7 +1690,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@ -1815,30 +1777,7 @@
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"text": "",
"value": ""
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "namespace",
"options": [ ],
"query": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
"query": "label_values(kube_pod_info, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@ -1875,6 +1814,29 @@
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"text": "",
"value": ""
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "namespace",
"options": [ ],
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},

View File

@ -154,7 +154,7 @@
"refId": "A"
}
],
"title": "Running Containers",
"title": "Running Container",
"transparent": false,
"type": "stat"
},
@ -294,7 +294,7 @@
"pluginVersion": "7",
"targets": [
{
"expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval]))",
"expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -347,7 +347,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (operation_type, instance)",
"expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (operation_type, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_type}}",
@ -432,7 +432,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)",
"expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_type}}",
@ -517,7 +517,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))",
"expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_type}}",
@ -602,14 +602,14 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance)",
"expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} pod",
"refId": "A"
},
{
"expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance)",
"expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} worker",
@ -694,14 +694,14 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} pod",
"refId": "A"
},
{
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} worker",
@ -788,7 +788,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)",
"expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
@ -875,7 +875,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)",
"expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
@ -962,7 +962,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin, le))",
"expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
@ -1047,7 +1047,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)",
"expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{operation_type}}",
@ -1132,7 +1132,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))",
"expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_type}}",
@ -1218,7 +1218,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance)",
"expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -1303,7 +1303,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -1388,7 +1388,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -1473,28 +1473,28 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "2xx",
"refId": "A"
},
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "3xx",
"refId": "B"
},
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "4xx",
"refId": "C"
},
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "5xx",
@ -1579,7 +1579,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, verb, url, le))",
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{verb}} {{url}}",
@ -1749,7 +1749,7 @@
"steppedLine": false,
"targets": [
{
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])",
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -1893,7 +1893,7 @@
"value": "default"
},
"hide": 0,
"label": "Data Source",
"label": null,
"name": "datasource",
"options": [ ],
"query": "prometheus",
@ -1927,11 +1927,11 @@
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
"label": "Data Source",
"label": null,
"multi": false,
"name": "instance",
"options": [ ],
"query": "label_values(up{job=\"kubelet\",cluster=\"$cluster\"}, instance)",
"query": "label_values(kubelet_runtime_operations_total{cluster=\"$cluster\", job=\"kubelet\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -1137,7 +1137,7 @@
"value": "default"
},
"hide": 0,
"label": "Data Source",
"label": null,
"name": "datasource",
"options": [ ],
"query": "prometheus",

View File

@ -1349,7 +1349,7 @@
"value": "default"
},
"hide": 0,
"label": "Data Source",
"label": null,
"name": "datasource",
"options": [ ],
"query": "prometheus",

View File

@ -26,14 +26,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 2,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -125,11 +124,7 @@
},
"gridPos": { },
"id": 3,
"interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"interval": null,
"links": [ ],
"mappingType": 1,
"mappingTypes": [
@ -212,14 +207,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -311,11 +305,7 @@
},
"gridPos": { },
"id": 5,
"interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"interval": null,
"links": [ ],
"mappingType": 1,
"mappingTypes": [
@ -398,7 +388,7 @@
"value": "default"
},
"hide": 0,
"label": "Data Source",
"label": null,
"name": "datasource",
"options": [ ],
"query": "prometheus",
@ -416,7 +406,7 @@
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(kubelet_volume_stats_capacity_bytes{job=\"kubelet\"}, cluster)",
"query": "label_values(kubelet_volume_stats_capacity_bytes, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -903,7 +903,7 @@
"value": "default"
},
"hide": 0,
"label": "Data Source",
"label": null,
"name": "datasource",
"options": [ ],
"query": "prometheus",

View File

@ -36,11 +36,7 @@
},
"gridPos": { },
"id": 2,
"interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"interval": null,
"links": [ ],
"mappingType": 1,
"mappingTypes": [
@ -110,14 +106,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 3,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -139,7 +134,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))",
"expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "rate",
@ -192,7 +187,6 @@
"fillGradient": 0,
"gridPos": { },
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -221,7 +215,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))",
"expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -287,14 +281,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 5,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -316,7 +309,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))",
"expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "rate",
@ -369,7 +362,6 @@
"fillGradient": 0,
"gridPos": { },
"id": 6,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -398,7 +390,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
"expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m])) by (instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -464,14 +456,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 7,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -493,28 +484,28 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "2xx",
"refId": "A"
},
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "3xx",
"refId": "B"
},
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "4xx",
"refId": "C"
},
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "5xx",
@ -567,14 +558,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 8,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -596,7 +586,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[5m])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}",
@ -662,7 +652,6 @@
"fillGradient": 0,
"gridPos": { },
"id": 9,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -691,7 +680,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}",
@ -757,14 +746,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 10,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -839,14 +827,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 11,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -868,7 +855,7 @@
"steppedLine": false,
"targets": [
{
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}[$__rate_interval])",
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -921,14 +908,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 12,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -1016,7 +1002,7 @@
"value": "default"
},
"hide": 0,
"label": "Data Source",
"label": null,
"name": "datasource",
"options": [ ],
"query": "prometheus",
@ -1034,7 +1020,7 @@
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(up{job=\"kube-proxy\"}, cluster)",
"query": "label_values(kube_pod_info, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@ -1054,7 +1040,7 @@
"multi": false,
"name": "instance",
"options": [ ],
"query": "label_values(up{job=\"kube-proxy\", cluster=\"$cluster\", job=\"kube-proxy\"}, instance)",
"query": "label_values(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -36,11 +36,7 @@
},
"gridPos": { },
"id": 2,
"interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"interval": null,
"links": [ ],
"mappingType": 1,
"mappingTypes": [
@ -110,7 +106,6 @@
"fillGradient": 0,
"gridPos": { },
"id": 3,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -139,28 +134,28 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
"expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} e2e",
"refId": "A"
},
{
"expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
"expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} binding",
"refId": "B"
},
{
"expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
"expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} scheduling algorithm",
"refId": "C"
},
{
"expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
"expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} volume",
@ -213,7 +208,6 @@
"fillGradient": 0,
"gridPos": { },
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -242,28 +236,28 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
"expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} e2e",
"refId": "A"
},
{
"expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
"expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} binding",
"refId": "B"
},
{
"expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
"expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} scheduling algorithm",
"refId": "C"
},
{
"expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
"expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} volume",
@ -329,14 +323,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 5,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -358,28 +351,28 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "2xx",
"refId": "A"
},
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "3xx",
"refId": "B"
},
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "4xx",
"refId": "C"
},
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "5xx",
@ -432,14 +425,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 6,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -461,7 +453,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}",
@ -527,7 +519,6 @@
"fillGradient": 0,
"gridPos": { },
"id": 7,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -556,7 +547,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}",
@ -622,14 +613,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 8,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -704,14 +694,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 9,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -733,7 +722,7 @@
"steppedLine": false,
"targets": [
{
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])",
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -786,14 +775,13 @@
"fillGradient": 0,
"gridPos": { },
"id": 10,
"interval": "1m",
"legend": {
"alignAsTable": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
@ -881,7 +869,7 @@
"value": "default"
},
"hide": 0,
"label": "Data Source",
"label": null,
"name": "datasource",
"options": [ ],
"query": "prometheus",
@ -919,7 +907,7 @@
"multi": false,
"name": "instance",
"options": [ ],
"query": "label_values(up{job=\"kube-scheduler\", cluster=\"$cluster\"}, instance)",
"query": "label_values(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -89,7 +89,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ pod }}",
@ -184,7 +184,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ pod }}",
@ -290,7 +290,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(avg(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ pod }}",
@ -385,7 +385,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ pod }}",
@ -506,7 +506,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@ -597,7 +597,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@ -699,7 +699,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@ -790,7 +790,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@ -901,7 +901,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@ -992,7 +992,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@ -1061,7 +1061,7 @@
"value": "default"
},
"hide": 0,
"label": "Data Source",
"label": null,
"name": "datasource",
"options": [ ],
"query": "prometheus",
@ -1079,7 +1079,7 @@
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(kube_pod_info{job=\"kube-state-metrics\"}, cluster)",
"query": "label_values(kube_pod_info, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,
@ -1099,14 +1099,14 @@
"value": "kube-system"
},
"datasource": "$datasource",
"definition": "label_values(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\"}, namespace)",
"definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)",
"hide": 0,
"includeAll": true,
"label": null,
"multi": false,
"name": "namespace",
"options": [ ],
"query": "label_values(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\"}, namespace)",
"query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)",
"refresh": 2,
"regex": "",
"skipUrlSync": false,

View File

@ -8,7 +8,7 @@
"subdir": "jsonnet/kube-prometheus"
}
},
"version": "main"
"version": "release-0.9"
}
],
"legacyImports": true

View File

@ -8,8 +8,8 @@
"subdir": "grafana"
}
},
"version": "199e363523104ff8b3a12483a4e3eca86372b078",
"sum": "/jDHzVAjHB4AOLkJHw1GyATX5ogZ1iMdcJXZAgaG3+g="
"version": "90f38916f1f8a310a715d18e36f787f84df4ddf5",
"sum": "0kZ1pnuIirDtbg6F9at5+NQOwKNONIGEPq0eECzvRkI="
},
{
"source": {
@ -18,8 +18,8 @@
"subdir": "contrib/mixin"
}
},
"version": "29292aa7bdafaf65cb5e054591fe0ff07b36f5ee",
"sum": "cdKL5kPYfpWSpTCu4qctmh+gWQqL+4YWom6rw9qLYJU="
"version": "a62444690780fd1eb72615b0cd6d804ea54cc771",
"sum": "W/Azptf1PoqjyMwJON96UY69MFugDA4IAYiKURscryc="
},
{
"source": {
@ -38,7 +38,7 @@
"subdir": "grafana-builder"
}
},
"version": "e2cfc18b7be54acc6978b7dab2c61ec5e37385a8",
"version": "5a128df878434da37969b811e99bb9cd0a3779e3",
"sum": "0KkygBQd/AFzUvVzezE4qF/uDYgrwUXVpZfINBti0oc="
},
{
@ -48,8 +48,8 @@
"subdir": ""
}
},
"version": "9821d07e94e9a9916575a234fb699ae3331fa939",
"sum": "xubNXyvDwUw9GZzi9BRb6ob3bYzfoMr5F5zCVn2d7ag="
"version": "fb9d8ed4bc4a3d6efac525f72e8a0d2c583a0fe2",
"sum": "xjKkdp+5fkekCNBUIgZCHTRmVdUEmQNFKslrL2Ho8gs="
},
{
"source": {
@ -58,7 +58,7 @@
"subdir": "lib/promgrafonnet"
}
},
"version": "9821d07e94e9a9916575a234fb699ae3331fa939",
"version": "c76b9378b86d28bd617d94a57c72b4770efed510",
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
},
{
@ -68,8 +68,8 @@
"subdir": "jsonnet/kube-state-metrics"
}
},
"version": "b761b5382bdd85d7af915516f48cba1c46859c1d",
"sum": "U1wzIpTAtOvC1yj43Y8PfvT0JfvnAcMfNH12Wi+ab0Y="
"version": "f170cc73f11c1580d7f38af746be0f2fa79c6a1e",
"sum": "S5qI+PJUdNeYOv76jH5nxwYS9N6U7CRxvyuB1wI4cTE="
},
{
"source": {
@ -78,7 +78,7 @@
"subdir": "jsonnet/kube-state-metrics-mixin"
}
},
"version": "b761b5382bdd85d7af915516f48cba1c46859c1d",
"version": "f170cc73f11c1580d7f38af746be0f2fa79c6a1e",
"sum": "u8gaydJoxEjzizQ8jY8xSjYgWooPmxw+wIWdDxifMAk="
},
{
@ -88,8 +88,8 @@
"subdir": "jsonnet/kube-prometheus"
}
},
"version": "9b532dac0c8b00e984d2f77b2f2f0ef0dae6e751",
"sum": "HCUXyGj9sJWtBV10TBxC+Q0RFz5VFiMM/y5u6/3t4tw="
"version": "c8db3c85db6e13b94726b7c5d630a6ebe7038cf9",
"sum": "7DfVEg2kDzIQPgxTThnjQ4S2ZksrF0hkNLysXP6Wv24="
},
{
"source": {
@ -98,8 +98,8 @@
"subdir": "jsonnet/mixin"
}
},
"version": "2200303e6a9629b3ccc0e9d466438dc079572aa3",
"sum": "qZ4WgiweaE6eeKtFK60QUjLO8sf2L9Q8fgafWvDcyfY=",
"version": "83fe36566f4e0894eb5ffcd2638a0f039a17bdeb",
"sum": "6reUygVmQrLEWQzTKcH8ceDbvM+2ztK3z2VBR2K2l+U=",
"name": "prometheus-operator-mixin"
},
{
@ -109,8 +109,8 @@
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "2200303e6a9629b3ccc0e9d466438dc079572aa3",
"sum": "9R1mw4Tz0/1V1QWkJMzqE4+iXXONEfYVikW8Mj5AOcA="
"version": "83fe36566f4e0894eb5ffcd2638a0f039a17bdeb",
"sum": "J1G++A8hrtr3+OZQMmcNeb1w/C30bXqqwpwHL/Xhsd4="
},
{
"source": {
@ -119,7 +119,7 @@
"subdir": "doc/alertmanager-mixin"
}
},
"version": "e2a10119aaf7777fa523d216e05897c5b719134c",
"version": "b408b522bc653d014e53035e59fa394cc1edd762",
"sum": "pep+dHzfIjh2SU5pEkwilMCAT/NoL6YYflV4x8cr7vU=",
"name": "alertmanager"
},
@ -130,8 +130,8 @@
"subdir": "docs/node-mixin"
}
},
"version": "2028460438d502be6137ac0738068538cd8fafe1",
"sum": "MlWDAKGZ+JArozRKdKEvewHeWn8j2DNBzesJfLVd0dk="
"version": "832909dd257eb368cf83363ffcae3ab84cb4bcb1",
"sum": "MmxGhE2PJ1a52mk2x7vDpMT2at4Jglbud/rK74CB5i0="
},
{
"source": {
@ -140,8 +140,8 @@
"subdir": "documentation/prometheus-mixin"
}
},
"version": "a14960eae102fb6e83817f7faf15695110ff433b",
"sum": "ZjQoYhvgKwJNkg+h+m9lW3SYjnjv5Yx5btEipLhru88=",
"version": "751ca03faddc9c64089c41d0da370a3a0b477742",
"sum": "AS8WYFi/z10BZSF6DFkKBscjB32XDMM7iIso7CO/FyI=",
"name": "prometheus"
},
{
@ -151,8 +151,8 @@
"subdir": "mixin"
}
},
"version": "4ed5206d1893975f6c45045d680f8c8a7dd2a25e",
"sum": "1Y1cPIeoPg2nCAEhKPCt8bAGuwuOP2eZ3kVF432mlMA=",
"version": "ff363498fc95cfe17de894d7237bcf38bdd0bc36",
"sum": "cajthvLKDjYgYHCKQU2g/pTMRkxcbuJEvTnCyJOihl8=",
"name": "thanos-mixin"
}
],

View File

@ -4,10 +4,9 @@
"metadata": {
"labels": {
"app.kubernetes.io/component": "alert-router",
"app.kubernetes.io/instance": "main",
"app.kubernetes.io/name": "alertmanager",
"app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "0.23.0",
"app.kubernetes.io/version": "0.22.2",
"prometheus": "k8s",
"role": "alert-rules"
},

View File

@ -64,10 +64,10 @@
{
"alert": "etcdGRPCRequestsSlow",
"annotations": {
"description": "etcd cluster \"{{ $labels.job }}\": 99th percentile of gRPC requests is {{ $value }}s on etcd instance {{ $labels.instance }} for {{ $labels.grpc_method }} method.",
"description": "etcd cluster \"{{ $labels.job }}\": gRPC requests to {{ $labels.grpc_method }} are taking {{ $value }}s on etcd instance {{ $labels.instance }}.",
"summary": "etcd grpc requests are slow"
},
"expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~\".*etcd.*\", grpc_method!=\"Defragment\", grpc_type=\"unary\"}[5m])) without(grpc_type))\n> 0.15\n",
"expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~\".*etcd.*\", grpc_type=\"unary\"}[5m])) without(grpc_type))\n> 0.15\n",
"for": "10m",
"labels": {
"severity": "critical"
@ -112,7 +112,7 @@
{
"alert": "etcdHighFsyncDurations",
"annotations": {
"description": "etcd cluster \"{{ $labels.job }}\": 99th percentile fsync durations are {{ $value }}s on etcd instance {{ $labels.instance }}."
"message": "etcd cluster \"{{ $labels.job }}\": 99th percentile fsync durations are {{ $value }}s on etcd instance {{ $labels.instance }}."
},
"expr": "histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~\".*etcd.*\"}[5m]))\n> 1\n",
"for": "10m",
@ -135,7 +135,7 @@
{
"alert": "etcdBackendQuotaLowSpace",
"annotations": {
"description": "etcd cluster \"{{ $labels.job }}\": database size exceeds the defined quota on etcd instance {{ $labels.instance }}, please defrag or increase the quota as the writes to etcd will be disabled when it is full."
"message": "etcd cluster \"{{ $labels.job }}\": database size exceeds the defined quota on etcd instance {{ $labels.instance }}, please defrag or increase the quota as the writes to etcd will be disabled when it is full."
},
"expr": "(etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100 > 95\n",
"for": "10m",
@ -146,7 +146,7 @@
{
"alert": "etcdExcessiveDatabaseGrowth",
"annotations": {
"description": "etcd cluster \"{{ $labels.job }}\": Observed surge in etcd writes leading to 50% increase in database size over the past four hours on etcd instance {{ $labels.instance }}, please check as it might be disruptive."
"message": "etcd cluster \"{{ $labels.job }}\": Observed surge in etcd writes leading to 50% increase in database size over the past four hours on etcd instance {{ $labels.instance }}, please check as it might be disruptive."
},
"expr": "increase(((etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100)[240m:1m]) > 50\n",
"for": "10m",

View File

@ -6,7 +6,7 @@
"app.kubernetes.io/component": "exporter",
"app.kubernetes.io/name": "kube-state-metrics",
"app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "2.3.0",
"app.kubernetes.io/version": "2.1.1",
"prometheus": "k8s",
"role": "alert-rules"
},

View File

@ -123,7 +123,7 @@
{
"alert": "KubeContainerWaiting",
"annotations": {
"description": "pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container {{ $labels.container}} has been in waiting state for longer than 1 hour.",
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} has been in waiting state for longer than 1 hour.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting",
"summary": "Pod container waiting longer than 1 hour"
},
@ -232,7 +232,7 @@
{
"alert": "KubeMemoryOvercommit",
"annotations": {
"description": "Cluster has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure.",
"description": "Cluster has overcommitted memory resource requests for Pods by {{ $value }} bytes and cannot tolerate node failure.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit",
"summary": "Cluster has overcommitted memory resource requests."
},
@ -249,7 +249,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit",
"summary": "Cluster has overcommitted CPU resource requests."
},
"expr": "sum(min without(resource) (kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=~\"(cpu|requests.cpu)\"}))\n /\nsum(kube_node_status_allocatable{resource=\"cpu\", job=\"kube-state-metrics\"})\n > 1.5\n",
"expr": "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"cpu\"})\n /\nsum(kube_node_status_allocatable{resource=\"cpu\"})\n > 1.5\n",
"for": "5m",
"labels": {
"severity": "warning"
@ -262,7 +262,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit",
"summary": "Cluster has overcommitted memory resource requests."
},
"expr": "sum(min without(resource) (kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=~\"(memory|requests.memory)\"}))\n /\nsum(kube_node_status_allocatable{resource=\"memory\", job=\"kube-state-metrics\"})\n > 1.5\n",
"expr": "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"memory\"})\n /\nsum(kube_node_status_allocatable{resource=\"memory\",job=\"kube-state-metrics\"})\n > 1.5\n",
"for": "5m",
"labels": {
"severity": "warning"
@ -332,7 +332,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup",
"summary": "PersistentVolume is filling up."
},
"expr": "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n",
"expr": "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\n",
"for": "1m",
"labels": {
"severity": "critical"
@ -345,7 +345,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup",
"summary": "PersistentVolume is filling up."
},
"expr": "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n",
"expr": "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\n",
"for": "1h",
"labels": {
"severity": "warning"
@ -468,7 +468,7 @@
{
"alert": "KubeClientCertificateExpiration",
"annotations": {
"description": "A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days.",
"description": "A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration",
"summary": "Client certificate is about to expire."
},
@ -480,7 +480,7 @@
{
"alert": "KubeClientCertificateExpiration",
"annotations": {
"description": "A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours.",
"description": "A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration",
"summary": "Client certificate is about to expire."
},
@ -490,11 +490,11 @@
}
},
{
"alert": "KubeAggregatedAPIErrors",
"alert": "AggregatedAPIErrors",
"annotations": {
"description": "Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors",
"summary": "Kubernetes aggregated API has reported errors."
"description": "An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/aggregatedapierrors",
"summary": "An aggregated API has reported errors."
},
"expr": "sum by(name, namespace)(increase(aggregator_unavailable_apiservice_total[10m])) > 4\n",
"labels": {
@ -502,11 +502,11 @@
}
},
{
"alert": "KubeAggregatedAPIDown",
"alert": "AggregatedAPIDown",
"annotations": {
"description": "Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown",
"summary": "Kubernetes aggregated API is down."
"description": "An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/aggregatedapidown",
"summary": "An aggregated API is down."
},
"expr": "(1 - max by(name, namespace)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85\n",
"for": "5m",
@ -530,9 +530,9 @@
{
"alert": "KubeAPITerminatedRequests",
"annotations": {
"description": "The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.",
"description": "The apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests",
"summary": "The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests."
"summary": "The apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests."
},
"expr": "sum(rate(apiserver_request_terminations_total{job=\"apiserver\"}[10m])) / ( sum(rate(apiserver_request_total{job=\"apiserver\"}[10m])) + sum(rate(apiserver_request_terminations_total{job=\"apiserver\"}[10m])) ) > 0.20\n",
"for": "5m",
@ -581,7 +581,7 @@
"expr": "count by(node) (\n (kube_pod_status_phase{job=\"kube-state-metrics\",phase=\"Running\"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job=\"kube-state-metrics\"})\n)\n/\nmax by(node) (\n kube_node_status_capacity{job=\"kube-state-metrics\",resource=\"pods\"} != 1\n) > 0.95\n",
"for": "15m",
"labels": {
"severity": "info"
"severity": "warning"
}
},
{
@ -748,71 +748,53 @@
}
]
},
{
"name": "kubernetes-system-kube-proxy",
"rules": [
{
"alert": "KubeProxyDown",
"annotations": {
"description": "KubeProxy has disappeared from Prometheus target discovery.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeproxydown",
"summary": "Target disappeared from Prometheus target discovery."
},
"expr": "absent(up{job=\"kube-proxy\"} == 1)\n",
"for": "15m",
"labels": {
"severity": "critical"
}
}
]
},
{
"name": "kube-apiserver-burnrate.rules",
"rules": [
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate1d"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate1h"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate2h"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate30m"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate3d"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate5m"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n",
"labels": {
"verb": "read"
},
@ -934,37 +916,21 @@
"record": "code:apiserver_request_total:increase30d"
},
{
"expr": "sum by (cluster, verb, scope) (increase(apiserver_request_duration_seconds_count[1h]))\n",
"record": "cluster_verb_scope:apiserver_request_duration_seconds_count:increase1h"
},
{
"expr": "sum by (cluster, verb, scope) (avg_over_time(cluster_verb_scope:apiserver_request_duration_seconds_count:increase1h[30d]) * 24 * 30)\n",
"record": "cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d"
},
{
"expr": "sum by (cluster, verb, scope, le) (increase(apiserver_request_duration_seconds_bucket[1h]))\n",
"record": "cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase1h"
},
{
"expr": "sum by (cluster, verb, scope, le) (avg_over_time(cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase1h[30d]) * 24 * 30)\n",
"record": "cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d"
},
{
"expr": "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n",
"expr": "1 - (\n (\n # write too slow\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~\"POST|PUT|PATCH|DELETE\"}[30d]))\n -\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30d]))\n ) +\n (\n # read too slow\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~\"LIST|GET\"}[30d]))\n -\n (\n (\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30d]))\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30d]))\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n",
"labels": {
"verb": "all"
},
"record": "apiserver_request:availability30d"
},
{
"expr": "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n",
"expr": "1 - (\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30d]))\n -\n (\n # too slow\n (\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30d]))\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30d]))\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:availability30d"
},
{
"expr": "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n",
"expr": "1 - (\n (\n # too slow\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~\"POST|PUT|PATCH|DELETE\"}[30d]))\n -\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30d]))\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n",
"labels": {
"verb": "write"
},
@ -1026,7 +992,7 @@
"record": "node_namespace_pod_container:container_memory_swap"
},
{
"expr": "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
"expr": "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
"record": "cluster:namespace:pod_memory:active:kube_pod_container_resource_requests"
},
{
@ -1034,7 +1000,7 @@
"record": "namespace_memory:kube_pod_container_resource_requests:sum"
},
{
"expr": "kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
"expr": "kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
"record": "cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests"
},
{
@ -1042,7 +1008,7 @@
"record": "namespace_cpu:kube_pod_container_resource_requests:sum"
},
{
"expr": "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
"expr": "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
"record": "cluster:namespace:pod_memory:active:kube_pod_container_resource_limits"
},
{
@ -1050,7 +1016,7 @@
"record": "namespace_memory:kube_pod_container_resource_limits:sum"
},
{
"expr": "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n )\n",
"expr": "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n )\n",
"record": "cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits"
},
{

View File

@ -6,7 +6,7 @@
"app.kubernetes.io/component": "exporter",
"app.kubernetes.io/name": "node-exporter",
"app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "1.3.1",
"app.kubernetes.io/version": "1.2.2",
"prometheus": "k8s",
"role": "alert-rules"
},
@ -25,7 +25,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup",
"summary": "Filesystem is predicted to run out of space within the next 24 hours."
},
"expr": "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 20\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 24*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n",
"expr": "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 40\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 24*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n",
"for": "1h",
"labels": {
"severity": "warning"
@ -255,11 +255,11 @@
"name": "node-exporter.rules",
"rules": [
{
"expr": "count without (cpu, mode) (\n node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}\n)\n",
"expr": "count without (cpu) (\n count without (mode) (\n node_cpu_seconds_total{job=\"node-exporter\"}\n )\n)\n",
"record": "instance:node_num_cpu:sum"
},
{
"expr": "1 - avg without (cpu) (\n sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\"}[5m]))\n)\n",
"expr": "1 - avg without (cpu, mode) (\n rate(node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\"}[5m])\n)\n",
"record": "instance:node_cpu_utilisation:rate5m"
},
{
@ -267,7 +267,7 @@
"record": "instance:node_load1_per_cpu:ratio"
},
{
"expr": "1 - (\n (\n node_memory_MemAvailable_bytes{job=\"node-exporter\"}\n or\n (\n node_memory_Buffers_bytes{job=\"node-exporter\"}\n +\n node_memory_Cached_bytes{job=\"node-exporter\"}\n +\n node_memory_MemFree_bytes{job=\"node-exporter\"}\n +\n node_memory_Slab_bytes{job=\"node-exporter\"}\n )\n )\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\"}\n)\n",
"expr": "1 - (\n node_memory_MemAvailable_bytes{job=\"node-exporter\"}\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\"}\n)\n",
"record": "instance:node_memory_utilisation:ratio"
},
{

View File

@ -6,7 +6,7 @@
"app.kubernetes.io/component": "controller",
"app.kubernetes.io/name": "prometheus-operator",
"app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "0.52.1",
"app.kubernetes.io/version": "0.49.0",
"prometheus": "k8s",
"role": "alert-rules"
},
@ -110,24 +110,6 @@
}
}
]
},
{
"name": "config-reloaders",
"rules": [
{
"alert": "ConfigReloaderSidecarErrors",
"annotations": {
"description": "Errors encountered while the {{$labels.pod}} config-reloader sidecar attempts to sync config in {{$labels.namespace}} namespace.\nAs a result, configuration for service running in {{$labels.pod}} may be stale and cannot be updated anymore.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/configreloadersidecarerrors",
"summary": "config-reloader sidecar has not had a successful reload for 10m"
},
"expr": "max_over_time(reloader_last_reload_successful{namespace=~\".+\"}[5m]) == 0\n",
"for": "10m",
"labels": {
"severity": "warning"
}
}
]
}
]
}

View File

@ -4,10 +4,9 @@
"metadata": {
"labels": {
"app.kubernetes.io/component": "prometheus",
"app.kubernetes.io/instance": "k8s",
"app.kubernetes.io/name": "prometheus",
"app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "2.31.1",
"app.kubernetes.io/version": "2.29.1",
"prometheus": "k8s",
"role": "alert-rules"
},

File diff suppressed because one or more lines are too long

View File

@ -18,9 +18,13 @@ spec:
- source:
notIpBlocks:
{{- toYaml $service.ipBlocks | nindent 8 }}
to:
- operation:
hosts: ["{{ $service.url }}"]
when:
- key: connection.sni
values: ["{{ $service.url }}"]
values:
- '*'
---
{{- end }}
{{- end }}

View File

@ -49,9 +49,9 @@ spec:
severity: warning
- alert: etcdGRPCRequestsSlow
annotations:
description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile of gRPC requests is {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}} for {{`{{`}} $labels.grpc_method {{`}}`}} method.'
description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": gRPC requests to {{`{{`}} $labels.grpc_method {{`}}`}} are taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
summary: etcd grpc requests are slow
expr: 'histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_method!="Defragment", grpc_type="unary"}[5m])) without(grpc_type))
expr: 'histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_type="unary"}[5m])) without(grpc_type))
> 0.15
@ -95,7 +95,7 @@ spec:
severity: warning
- alert: etcdHighFsyncDurations
annotations:
description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile fsync durations are {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile fsync durations are {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: 'histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
> 1
@ -118,7 +118,7 @@ spec:
severity: warning
- alert: etcdBackendQuotaLowSpace
annotations:
description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": database size exceeds the defined quota on etcd instance {{`{{`}} $labels.instance {{`}}`}}, please defrag or increase the quota as the writes to etcd will be disabled when it is full.'
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": database size exceeds the defined quota on etcd instance {{`{{`}} $labels.instance {{`}}`}}, please defrag or increase the quota as the writes to etcd will be disabled when it is full.'
expr: '(etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100 > 95
'
@ -127,7 +127,7 @@ spec:
severity: critical
- alert: etcdExcessiveDatabaseGrowth
annotations:
description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": Observed surge in etcd writes leading to 50% increase in database size over the past four hours on etcd instance {{`{{`}} $labels.instance {{`}}`}}, please check as it might be disruptive.'
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": Observed surge in etcd writes leading to 50% increase in database size over the past four hours on etcd instance {{`{{`}} $labels.instance {{`}}`}}, please check as it might be disruptive.'
expr: 'increase(((etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100)[240m:1m]) > 50
'

View File

@ -85,7 +85,7 @@ spec:
severity: warning
- alert: KubeContainerWaiting
annotations:
description: pod/{{`{{`}} $labels.pod {{`}}`}} in namespace {{`{{`}} $labels.namespace {{`}}`}} on container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour.
description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting
summary: Pod container waiting longer than 1 hour
expr: 'sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
@ -173,7 +173,7 @@ spec:
severity: warning
- alert: KubeMemoryOvercommit
annotations:
description: Cluster has overcommitted memory resource requests for Pods by {{`{{`}} $value | humanize {{`}}`}} bytes and cannot tolerate node failure.
description: Cluster has overcommitted memory resource requests for Pods by {{`{{`}} $value {{`}}`}} bytes and cannot tolerate node failure.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit
summary: Cluster has overcommitted memory resource requests.
expr: 'sum(namespace_memory:kube_pod_container_resource_requests:sum{}) - (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0
@ -191,7 +191,7 @@ spec:
description: Cluster has overcommitted CPU resource requests for Namespaces.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit
summary: Cluster has overcommitted CPU resource requests.
expr: "sum(min without(resource) (kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=~\"(cpu|requests.cpu)\"}))\n /\nsum(kube_node_status_allocatable{resource=\"cpu\", job=\"kube-state-metrics\"})\n > 1.5\n"
expr: "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"cpu\"})\n /\nsum(kube_node_status_allocatable{resource=\"cpu\"})\n > 1.5\n"
for: 5m
labels:
severity: warning
@ -200,7 +200,7 @@ spec:
description: Cluster has overcommitted memory resource requests for Namespaces.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit
summary: Cluster has overcommitted memory resource requests.
expr: "sum(min without(resource) (kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=~\"(memory|requests.memory)\"}))\n /\nsum(kube_node_status_allocatable{resource=\"memory\", job=\"kube-state-metrics\"})\n > 1.5\n"
expr: "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"memory\"})\n /\nsum(kube_node_status_allocatable{resource=\"memory\",job=\"kube-state-metrics\"})\n > 1.5\n"
for: 5m
labels:
severity: warning
@ -247,7 +247,7 @@ spec:
description: The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is only {{`{{`}} $value | humanizePercentage {{`}}`}} free.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
summary: PersistentVolume is filling up.
expr: "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n"
expr: "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\n"
for: 1m
labels:
severity: critical
@ -256,7 +256,7 @@ spec:
description: Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is expected to fill up within four days. Currently {{`{{`}} $value | humanizePercentage {{`}}`}} is available.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
summary: PersistentVolume is filling up.
expr: "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n"
expr: "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\n"
for: 1h
labels:
severity: warning
@ -367,7 +367,7 @@ spec:
rules:
- alert: KubeClientCertificateExpiration
annotations:
description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days.
description: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration
summary: Client certificate is about to expire.
expr: 'apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
@ -377,7 +377,7 @@ spec:
severity: warning
- alert: KubeClientCertificateExpiration
annotations:
description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours.
description: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration
summary: Client certificate is about to expire.
expr: 'apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
@ -385,21 +385,21 @@ spec:
'
labels:
severity: critical
- alert: KubeAggregatedAPIErrors
- alert: AggregatedAPIErrors
annotations:
description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has reported errors. It has appeared unavailable {{`{{`}} $value | humanize {{`}}`}} times averaged over the past 10m.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors
summary: Kubernetes aggregated API has reported errors.
description: An aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has reported errors. It has appeared unavailable {{`{{`}} $value | humanize {{`}}`}} times averaged over the past 10m.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/aggregatedapierrors
summary: An aggregated API has reported errors.
expr: 'sum by(name, namespace)(increase(aggregator_unavailable_apiservice_total[10m])) > 4
'
labels:
severity: warning
- alert: KubeAggregatedAPIDown
- alert: AggregatedAPIDown
annotations:
description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown
summary: Kubernetes aggregated API is down.
description: An aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/aggregatedapidown
summary: An aggregated API is down.
expr: '(1 - max by(name, namespace)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85
'
@ -419,9 +419,9 @@ spec:
severity: critical
- alert: KubeAPITerminatedRequests
annotations:
description: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
description: The apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests
summary: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
summary: The apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
expr: 'sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / ( sum(rate(apiserver_request_total{job="apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20
'
@ -460,7 +460,7 @@ spec:
expr: "count by(node) (\n (kube_pod_status_phase{job=\"kube-state-metrics\",phase=\"Running\"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job=\"kube-state-metrics\"})\n)\n/\nmax by(node) (\n kube_node_status_capacity{job=\"kube-state-metrics\",resource=\"pods\"} != 1\n) > 0.95\n"
for: 15m
labels:
severity: info
severity: warning
- alert: KubeNodeReadinessFlapping
annotations:
description: The readiness status of node {{`{{`}} $labels.node {{`}}`}} has changed {{`{{`}} $value {{`}}`}} times in the last 15 minutes.
@ -593,46 +593,33 @@ spec:
for: 15m
labels:
severity: critical
- name: kubernetes-system-kube-proxy
rules:
- alert: KubeProxyDown
annotations:
description: KubeProxy has disappeared from Prometheus target discovery.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeproxydown
summary: Target disappeared from Prometheus target discovery.
expr: 'absent(up{job="kube-proxy"} == 1)
'
for: 15m
labels:
severity: critical
- name: kube-apiserver-burnrate.rules
rules:
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n"
labels:
verb: read
record: apiserver_request:burnrate1d
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n"
labels:
verb: read
record: apiserver_request:burnrate1h
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n"
labels:
verb: read
record: apiserver_request:burnrate2h
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n"
labels:
verb: read
record: apiserver_request:burnrate30m
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n"
labels:
verb: read
record: apiserver_request:burnrate3d
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n"
labels:
verb: read
record: apiserver_request:burnrate5m
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n"
labels:
verb: read
record: apiserver_request:burnrate6h
@ -717,32 +704,16 @@ spec:
labels:
verb: write
record: code:apiserver_request_total:increase30d
- expr: 'sum by (cluster, verb, scope) (increase(apiserver_request_duration_seconds_count[1h]))
'
record: cluster_verb_scope:apiserver_request_duration_seconds_count:increase1h
- expr: 'sum by (cluster, verb, scope) (avg_over_time(cluster_verb_scope:apiserver_request_duration_seconds_count:increase1h[30d]) * 24 * 30)
'
record: cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d
- expr: 'sum by (cluster, verb, scope, le) (increase(apiserver_request_duration_seconds_bucket[1h]))
'
record: cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase1h
- expr: 'sum by (cluster, verb, scope, le) (avg_over_time(cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase1h[30d]) * 24 * 30)
'
record: cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d
- expr: "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"\
})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n"
- expr: "1 - (\n (\n # write too slow\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~\"POST|PUT|PATCH|DELETE\"}[30d]))\n -\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30d]))\n ) +\n (\n # read too slow\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~\"LIST|GET\"}[30d]))\n -\n (\n (\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30d]))\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30d]))\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"}\
\ or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n"
labels:
verb: all
record: apiserver_request:availability30d
- expr: "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n"
- expr: "1 - (\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30d]))\n -\n (\n # too slow\n (\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30d]))\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30d]))\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n"
labels:
verb: read
record: apiserver_request:availability30d
- expr: "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n"
- expr: "1 - (\n (\n # too slow\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~\"POST|PUT|PATCH|DELETE\"}[30d]))\n -\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30d]))\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n"
labels:
verb: write
record: apiserver_request:availability30d
@ -786,19 +757,19 @@ spec:
record: node_namespace_pod_container:container_memory_cache
- expr: "container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,\n max by(namespace, pod, node) (kube_pod_info{node!=\"\"})\n)\n"
record: node_namespace_pod_container:container_memory_swap
- expr: "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
- expr: "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests
- expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n"
record: namespace_memory:kube_pod_container_resource_requests:sum
- expr: "kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
- expr: "kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests
- expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n"
record: namespace_cpu:kube_pod_container_resource_requests:sum
- expr: "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
- expr: "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits
- expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n"
record: namespace_memory:kube_pod_container_resource_limits:sum
- expr: "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n )\n"
- expr: "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n )\n"
record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits
- expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n"
record: namespace_cpu:kube_pod_container_resource_limits:sum

View File

@ -14,7 +14,7 @@ spec:
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
summary: Filesystem is predicted to run out of space within the next 24 hours.
expr: "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 20\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 24*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n"
expr: "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 40\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 24*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n"
for: 1h
labels:
severity: warning
@ -188,13 +188,13 @@ spec:
severity: critical
- name: node-exporter.rules
rules:
- expr: "count without (cpu, mode) (\n node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}\n)\n"
- expr: "count without (cpu) (\n count without (mode) (\n node_cpu_seconds_total{job=\"node-exporter\"}\n )\n)\n"
record: instance:node_num_cpu:sum
- expr: "1 - avg without (cpu) (\n sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\"}[5m]))\n)\n"
- expr: "1 - avg without (cpu, mode) (\n rate(node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\"}[5m])\n)\n"
record: instance:node_cpu_utilisation:rate5m
- expr: "(\n node_load1{job=\"node-exporter\"}\n/\n instance:node_num_cpu:sum{job=\"node-exporter\"}\n)\n"
record: instance:node_load1_per_cpu:ratio
- expr: "1 - (\n (\n node_memory_MemAvailable_bytes{job=\"node-exporter\"}\n or\n (\n node_memory_Buffers_bytes{job=\"node-exporter\"}\n +\n node_memory_Cached_bytes{job=\"node-exporter\"}\n +\n node_memory_MemFree_bytes{job=\"node-exporter\"}\n +\n node_memory_Slab_bytes{job=\"node-exporter\"}\n )\n )\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\"}\n)\n"
- expr: "1 - (\n node_memory_MemAvailable_bytes{job=\"node-exporter\"}\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\"}\n)\n"
record: instance:node_memory_utilisation:ratio
- expr: 'rate(node_vmstat_pgmajfault{job="node-exporter"}[5m])

View File

@ -86,19 +86,4 @@ spec:
for: 5m
labels:
severity: warning
- name: config-reloaders
rules:
- alert: ConfigReloaderSidecarErrors
annotations:
description: 'Errors encountered while the {{`{{`}}$labels.pod{{`}}`}} config-reloader sidecar attempts to sync config in {{`{{`}}$labels.namespace{{`}}`}} namespace.
As a result, configuration for service running in {{`{{`}}$labels.pod{{`}}`}} may be stale and cannot be updated anymore.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/configreloadersidecarerrors
summary: config-reloader sidecar has not had a successful reload for 10m
expr: 'max_over_time(reloader_last_reload_successful{namespace=~".+"}[5m]) == 0
'
for: 10m
labels:
severity: warning

View File

@ -1,8 +1,4 @@
kube-prometheus-stack:
global:
rbac:
pspEnabled: false
defaultRules:
create: false
@ -75,7 +71,10 @@ kube-prometheus-stack:
nodeExporter:
enabled: true
serviceMonitor:
prometheus-node-exporter:
prometheus:
monitor:
relabelings:
- sourceLabels: [__meta_kubernetes_pod_node_name]
separator: ;
@ -83,10 +82,6 @@ kube-prometheus-stack:
targetLabel: node
replacement: $1
action: replace
prometheus-node-exporter:
rbac:
pspEnabled: false
resources:
requests:
cpu: 20m
@ -127,8 +122,6 @@ kube-prometheus-stack:
# Custom Grafana tweaks
grafana:
enabled: true
rbac:
pspEnabled: false
# Disabled as we use the upstream kube-mixin dashboards directly
defaultDashboardsEnabled: false
@ -190,8 +183,6 @@ kube-prometheus-stack:
# Assign state metrics to control plane
kube-state-metrics:
podSecurityPolicy:
enabled: false
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
@ -209,6 +200,34 @@ kube-prometheus-stack:
group_wait: 30s
group_interval: 5m
repeat_interval: 6h
routes:
- matchers:
- alertname = Watchdog
receiver: 'null'
- matchers:
- alertname = InfoInhibitor
receiver: 'null'
inhibit_rules:
- equal:
- namespace
- alertname
source_matchers:
- severity = critical
target_matchers:
- severity =~ warning|info
- equal:
- namespace
- alertname
source_matchers:
- severity = warning
target_matchers:
- severity = info
- equal:
- namespace
source_matchers:
- alertname = InfoInhibitor
target_matchers:
- severity = info
alertmanagerSpec:
# externalUrl:
logFormat: json