feat: another round of upgrade to 1.23.11

This commit is contained in:
Stefan Reimer 2022-09-16 11:21:38 +02:00
parent 1f7971220e
commit c2b929f952
97 changed files with 2458 additions and 862 deletions

View File

@ -133,6 +133,9 @@ control_plane_upgrade kubeadm_upgrade
kubectl delete ds kube-multus-ds -n kube-system kubectl delete ds kube-multus-ds -n kube-system
# Required due to chart upgrade to 4.X part of prometheus-stack 40.X
kubectl delete daemonset metrics-prometheus-node-exporter -n monitoring
control_plane_upgrade "apply_network, apply_addons" control_plane_upgrade "apply_network, apply_addons"
kubectl rollout restart daemonset/calico-node -n kube-system kubectl rollout restart daemonset/calico-node -n kube-system

View File

@ -39,4 +39,4 @@ Kubernetes: `>= 1.18.0`
| service.port | int | `3310` | The port to be used by the clamav service | | service.port | int | `3310` | The port to be used by the clamav service |
---------------------------------------------- ----------------------------------------------
Autogenerated from chart metadata using [helm-docs v1.9.1](https://github.com/norwoodj/helm-docs/releases/v1.9.1) Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0)

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: kubeadm name: kubeadm
description: KubeZero Kubeadm cluster config description: KubeZero Kubeadm cluster config
type: application type: application
version: 1.23.10 version: 1.23.11
home: https://kubezero.com home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords: keywords:

View File

@ -3,7 +3,7 @@ name: kubezero-addons
description: KubeZero umbrella chart for various optional cluster addons description: KubeZero umbrella chart for various optional cluster addons
type: application type: application
version: 0.6.2 version: 0.6.2
appVersion: v1.23.10 appVersion: v1.23.11
home: https://kubezero.com home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords: keywords:

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: kubezero-metrics name: kubezero-metrics
description: KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all Kubernetes integrations. description: KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all Kubernetes integrations.
type: application type: application
version: 0.8.1 version: 0.8.4
home: https://kubezero.com home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords: keywords:
@ -18,7 +18,7 @@ dependencies:
version: ">= 0.1.5" version: ">= 0.1.5"
repository: https://cdn.zero-downtime.net/charts/ repository: https://cdn.zero-downtime.net/charts/
- name: kube-prometheus-stack - name: kube-prometheus-stack
version: 39.9.0 version: 40.0.0
# Switch back to upstream once all alerts are fixed eg. etcd gpcr # Switch back to upstream once all alerts are fixed eg. etcd gpcr
# repository: https://prometheus-community.github.io/helm-charts # repository: https://prometheus-community.github.io/helm-charts
- name: prometheus-adapter - name: prometheus-adapter

View File

@ -6,20 +6,20 @@ annotations:
url: https://github.com/prometheus-operator/kube-prometheus url: https://github.com/prometheus-operator/kube-prometheus
artifacthub.io/operator: "true" artifacthub.io/operator: "true"
apiVersion: v2 apiVersion: v2
appVersion: 0.58.0 appVersion: 0.59.1
dependencies: dependencies:
- condition: kubeStateMetrics.enabled - condition: kubeStateMetrics.enabled
name: kube-state-metrics name: kube-state-metrics
repository: https://prometheus-community.github.io/helm-charts repository: https://prometheus-community.github.io/helm-charts
version: 4.15.* version: 4.18.*
- condition: nodeExporter.enabled - condition: nodeExporter.enabled
name: prometheus-node-exporter name: prometheus-node-exporter
repository: https://prometheus-community.github.io/helm-charts repository: https://prometheus-community.github.io/helm-charts
version: 3.3.* version: 4.2.*
- condition: grafana.enabled - condition: grafana.enabled
name: grafana name: grafana
repository: https://grafana.github.io/helm-charts repository: https://grafana.github.io/helm-charts
version: 6.32.* version: 6.38.*
description: kube-prometheus-stack collects Kubernetes manifests, Grafana dashboards, description: kube-prometheus-stack collects Kubernetes manifests, Grafana dashboards,
and Prometheus rules combined with documentation and scripts to provide easy to and Prometheus rules combined with documentation and scripts to provide easy to
operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus
@ -51,4 +51,4 @@ sources:
- https://github.com/prometheus-community/helm-charts - https://github.com/prometheus-community/helm-charts
- https://github.com/prometheus-operator/kube-prometheus - https://github.com/prometheus-operator/kube-prometheus
type: application type: application
version: 39.9.0 version: 40.0.0

View File

@ -80,6 +80,33 @@ _See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documen
A major chart version change (like v1.2.3 -> v2.0.0) indicates that there is an incompatible breaking change needing manual actions. A major chart version change (like v1.2.3 -> v2.0.0) indicates that there is an incompatible breaking change needing manual actions.
### From 39.x to 40.x
This version upgrades Prometheus-Operator to v0.59.1, Prometheus to v2.38.0, kube-state-metrics to v2.6.0 and Thanos to v0.28.0.
This version also upgrades the Helm charts of kube-state-metrics to 4.18.0 and prometheus-node-exporter to 4.2.0.
Run these commands to update the CRDs before applying the upgrade.
```console
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
```
Starting from prometheus-node-exporter version 4.0.0, the `node exporter` chart is using the [Kubernetes recommended labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/common-labels/). Therefore you have to delete the daemonset before you upgrade.
```console
kubectl delete daemonset -l app=prometheus-node-exporter
helm upgrade -i kube-prometheus-stack prometheus-community/kube-prometheus-stack
```
If you use your own custom [ServiceMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#servicemonitor) or [PodMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#podmonitor), please ensure to upgrade their `selector` fields accordingly to the new labels.
### From 38.x to 39.x ### From 38.x to 39.x
This upgraded prometheus-operator to v0.58.0 and prometheus to v2.37.0 This upgraded prometheus-operator to v0.58.0 and prometheus to v2.37.0

View File

@ -1,5 +1,5 @@
apiVersion: v2 apiVersion: v2
appVersion: 9.0.5 appVersion: 9.1.4
description: The leading tool for querying and visualizing time series and metrics. description: The leading tool for querying and visualizing time series and metrics.
home: https://grafana.net home: https://grafana.net
icon: https://raw.githubusercontent.com/grafana/grafana/master/public/img/logo_transparent_400x.png icon: https://raw.githubusercontent.com/grafana/grafana/master/public/img/logo_transparent_400x.png
@ -19,4 +19,4 @@ name: grafana
sources: sources:
- https://github.com/grafana/grafana - https://github.com/grafana/grafana
type: application type: application
version: 6.32.10 version: 6.38.0

View File

@ -67,6 +67,7 @@ This version requires Helm >= 3.1.0.
| `service.type` | Kubernetes service type | `ClusterIP` | | `service.type` | Kubernetes service type | `ClusterIP` |
| `service.port` | Kubernetes port where service is exposed | `80` | | `service.port` | Kubernetes port where service is exposed | `80` |
| `service.portName` | Name of the port on the service | `service` | | `service.portName` | Name of the port on the service | `service` |
| `service.appProtocol` | Adds the appProtocol field to the service | `` |
| `service.targetPort` | Internal service is port | `3000` | | `service.targetPort` | Internal service is port | `3000` |
| `service.nodePort` | Kubernetes service nodePort | `nil` | | `service.nodePort` | Kubernetes service nodePort | `nil` |
| `service.annotations` | Service annotations (can be templated) | `{}` | | `service.annotations` | Service annotations (can be templated) | `{}` |
@ -127,6 +128,7 @@ This version requires Helm >= 3.1.0.
| `extraEmptyDirMounts` | Additional grafana server emptyDir volume mounts | `[]` | | `extraEmptyDirMounts` | Additional grafana server emptyDir volume mounts | `[]` |
| `plugins` | Plugins to be loaded along with Grafana | `[]` | | `plugins` | Plugins to be loaded along with Grafana | `[]` |
| `datasources` | Configure grafana datasources (passed through tpl) | `{}` | | `datasources` | Configure grafana datasources (passed through tpl) | `{}` |
| `alerting` | Configure grafana alerting (passed through tpl) | `{}` |
| `notifiers` | Configure grafana notifiers | `{}` | | `notifiers` | Configure grafana notifiers | `{}` |
| `dashboardProviders` | Configure grafana dashboard providers | `{}` | | `dashboardProviders` | Configure grafana dashboard providers | `{}` |
| `dashboards` | Dashboards to import | `{}` | | `dashboards` | Dashboards to import | `{}` |
@ -233,13 +235,14 @@ This version requires Helm >= 3.1.0.
| `imageRenderer.priorityClassName` | image-renderer deployment priority class | `''` | | `imageRenderer.priorityClassName` | image-renderer deployment priority class | `''` |
| `imageRenderer.service.enabled` | Enable the image-renderer service | `true` | | `imageRenderer.service.enabled` | Enable the image-renderer service | `true` |
| `imageRenderer.service.portName` | image-renderer service port name | `http` | | `imageRenderer.service.portName` | image-renderer service port name | `http` |
| `imageRenderer.service.port` | image-renderer service port used by both service and deployment | `8081` | | `imageRenderer.service.port` | image-renderer port used by deployment | `8081` |
| `imageRenderer.grafanaProtocol` | Protocol to use for image renderer callback url | `http` | | `imageRenderer.service.targetPort` | image-renderer service port used by service | `8081` |
| `imageRenderer.appProtocol` | Adds the appProtocol field to the service | `` |
| `imageRenderer.grafanaSubPath` | Grafana sub path to use for image renderer callback url | `''` | | `imageRenderer.grafanaSubPath` | Grafana sub path to use for image renderer callback url | `''` |
| `imageRenderer.podPortName` | name of the image-renderer port on the pod | `http` | | `imageRenderer.podPortName` | name of the image-renderer port on the pod | `http` |
| `imageRenderer.revisionHistoryLimit` | number of image-renderer replica sets to keep | `10` | | `imageRenderer.revisionHistoryLimit` | number of image-renderer replica sets to keep | `10` |
| `imageRenderer.networkPolicy.limitIngress` | Enable a NetworkPolicy to limit inbound traffic from only the created grafana pods | `true` | | `imageRenderer.networkPolicy.limitIngress` | Enable a NetworkPolicy to limit inbound traffic from only the created grafana pods | `true` |
| `imageRenderer.networkPolicy.limitEgress` | Enable a NetworkPolicy to limit outbound traffic to only the created grafana pods | `false` | | `imageRenderer.networkPolicy.limitEgress` | Enable a NetworkPolicy to limit outbound traffic to only the created grafana pods | `false` |
| `imageRenderer.resources` | Set resource limits for image-renderer pdos | `{}` | | `imageRenderer.resources` | Set resource limits for image-renderer pdos | `{}` |
| `imageRenderer.nodeSelector` | Node labels for pod assignment | `{}` | | `imageRenderer.nodeSelector` | Node labels for pod assignment | `{}` |
| `imageRenderer.tolerations` | Toleration labels for pod assignment | `[]` | | `imageRenderer.tolerations` | Toleration labels for pod assignment | `[]` |
@ -273,7 +276,7 @@ ingress:
### Example of extraVolumeMounts ### Example of extraVolumeMounts
Volume can be type persistentVolumeClaim or hostPath but not both at same time. Volume can be type persistentVolumeClaim or hostPath but not both at same time.
If none existingClaim or hostPath argument is givent then type is emptyDir. If neither existingClaim or hostPath argument is given then type is emptyDir.
```yaml ```yaml
- extraVolumeMounts: - extraVolumeMounts:
@ -482,7 +485,7 @@ grafana.ini:
## How to securely reference secrets in grafana.ini ## How to securely reference secrets in grafana.ini
This example uses Grafana uses [file providers](https://grafana.com/docs/grafana/latest/administration/configuration/#file-provider) for secret values and the `extraSecretMounts` configuration flag (Additional grafana server secret mounts) to mount the secrets. This example uses Grafana [file providers](https://grafana.com/docs/grafana/latest/administration/configuration/#file-provider) for secret values and the `extraSecretMounts` configuration flag (Additional grafana server secret mounts) to mount the secrets.
In grafana.ini: In grafana.ini:

View File

@ -141,22 +141,11 @@ Return the appropriate apiVersion for ingress.
{{- end -}} {{- end -}}
{{- end -}} {{- end -}}
{{/*
Return the appropriate apiVersion for podSecurityPolicy.
*/}}
{{- define "grafana.podSecurityPolicy.apiVersion" -}}
{{- if and (.Capabilities.APIVersions.Has "policy/v1beta1") (semverCompare ">= 1.16-0" .Capabilities.KubeVersion.Version) -}}
{{- print "policy/v1beta1" -}}
{{- else -}}
{{- print "extensions/v1beta1" -}}
{{- end -}}
{{- end -}}
{{/* {{/*
Return the appropriate apiVersion for podDisruptionBudget. Return the appropriate apiVersion for podDisruptionBudget.
*/}} */}}
{{- define "grafana.podDisruptionBudget.apiVersion" -}} {{- define "grafana.podDisruptionBudget.apiVersion" -}}
{{- if and (.Capabilities.APIVersions.Has "policy/v1") (semverCompare ">= 1.21-0" .Capabilities.KubeVersion.Version) -}} {{- if $.Capabilities.APIVersions.Has "policy/v1/PodDisruptionBudget" -}}
{{- print "policy/v1" -}} {{- print "policy/v1" -}}
{{- else -}} {{- else -}}
{{- print "policy/v1beta1" -}} {{- print "policy/v1beta1" -}}

View File

@ -1,17 +1,16 @@
{{- define "grafana.pod" -}} {{- define "grafana.pod" -}}
{{- if .Values.schedulerName }} {{- if .Values.schedulerName }}
schedulerName: "{{ .Values.schedulerName }}" schedulerName: "{{ .Values.schedulerName }}"
{{- end }} {{- end }}
serviceAccountName: {{ template "grafana.serviceAccountName" . }} serviceAccountName: {{ template "grafana.serviceAccountName" . }}
automountServiceAccountToken: {{ .Values.serviceAccount.autoMount }} automountServiceAccountToken: {{ .Values.serviceAccount.autoMount }}
{{- if .Values.securityContext }} {{- with .Values.securityContext }}
securityContext: securityContext:
{{ toYaml .Values.securityContext | indent 2 }} {{- toYaml . | nindent 2 }}
{{- end }} {{- end }}
{{- if .Values.hostAliases }} {{- with .Values.hostAliases }}
hostAliases: hostAliases:
{{ toYaml .Values.hostAliases | indent 2 }} {{- toYaml . | nindent 2 }}
{{- end }} {{- end }}
{{- if .Values.priorityClassName }} {{- if .Values.priorityClassName }}
priorityClassName: {{ .Values.priorityClassName }} priorityClassName: {{ .Values.priorityClassName }}
@ -31,8 +30,10 @@ initContainers:
runAsNonRoot: false runAsNonRoot: false
runAsUser: 0 runAsUser: 0
command: ["chown", "-R", "{{ .Values.securityContext.runAsUser }}:{{ .Values.securityContext.runAsGroup }}", "/var/lib/grafana"] command: ["chown", "-R", "{{ .Values.securityContext.runAsUser }}:{{ .Values.securityContext.runAsGroup }}", "/var/lib/grafana"]
{{- with .Values.initChownData.resources }}
resources: resources:
{{ toYaml .Values.initChownData.resources | indent 6 }} {{- toYaml . | nindent 6 }}
{{- end }}
volumeMounts: volumeMounts:
- name: storage - name: storage
mountPath: "/var/lib/grafana" mountPath: "/var/lib/grafana"
@ -50,13 +51,19 @@ initContainers:
imagePullPolicy: {{ .Values.downloadDashboardsImage.pullPolicy }} imagePullPolicy: {{ .Values.downloadDashboardsImage.pullPolicy }}
command: ["/bin/sh"] command: ["/bin/sh"]
args: [ "-c", "mkdir -p /var/lib/grafana/dashboards/default && /bin/sh -x /etc/grafana/download_dashboards.sh" ] args: [ "-c", "mkdir -p /var/lib/grafana/dashboards/default && /bin/sh -x /etc/grafana/download_dashboards.sh" ]
{{- with .Values.downloadDashboards.resources }}
resources: resources:
{{ toYaml .Values.downloadDashboards.resources | indent 6 }} {{- toYaml . | nindent 6 }}
{{- end }}
env: env:
{{- range $key, $value := .Values.downloadDashboards.env }} {{- range $key, $value := .Values.downloadDashboards.env }}
- name: "{{ $key }}" - name: "{{ $key }}"
value: "{{ $value }}" value: "{{ $value }}"
{{- end }} {{- end }}
{{- with .Values.downloadDashboards.securityContext }}
securityContext:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- if .Values.downloadDashboards.envFromSecret }} {{- if .Values.downloadDashboards.envFromSecret }}
envFrom: envFrom:
- secretRef: - secretRef:
@ -86,6 +93,14 @@ initContainers:
{{- end }} {{- end }}
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }} imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
env: env:
{{- range $key, $value := .Values.sidecar.datasources.env }}
- name: "{{ $key }}"
value: "{{ $value }}"
{{- end }}
{{- if .Values.sidecar.datasources.ignoreAlreadyProcessed }}
- name: IGNORE_ALREADY_PROCESSED
value: "true"
{{- end }}
- name: METHOD - name: METHOD
value: "LIST" value: "LIST"
- name: LABEL - name: LABEL
@ -94,6 +109,10 @@ initContainers:
- name: LABEL_VALUE - name: LABEL_VALUE
value: {{ quote .Values.sidecar.datasources.labelValue }} value: {{ quote .Values.sidecar.datasources.labelValue }}
{{- end }} {{- end }}
{{- if or .Values.sidecar.logLevel .Values.sidecar.datasources.logLevel }}
- name: LOG_LEVEL
value: {{ default .Values.sidecar.logLevel .Values.sidecar.datasources.logLevel }}
{{- end }}
- name: FOLDER - name: FOLDER
value: "/etc/grafana/provisioning/datasources" value: "/etc/grafana/provisioning/datasources"
- name: RESOURCE - name: RESOURCE
@ -110,12 +129,14 @@ initContainers:
- name: SKIP_TLS_VERIFY - name: SKIP_TLS_VERIFY
value: "{{ .Values.sidecar.skipTlsVerify }}" value: "{{ .Values.sidecar.skipTlsVerify }}"
{{- end }} {{- end }}
{{- with .Values.sidecar.resources }}
resources: resources:
{{ toYaml .Values.sidecar.resources | indent 6 }} {{- toYaml . | nindent 6 }}
{{- if .Values.sidecar.securityContext }} {{- end }}
{{- with .Values.sidecar.securityContext }}
securityContext: securityContext:
{{- toYaml .Values.sidecar.securityContext | nindent 6 }} {{- toYaml . | nindent 6 }}
{{- end }} {{- end }}
volumeMounts: volumeMounts:
- name: sc-datasources-volume - name: sc-datasources-volume
mountPath: "/etc/grafana/provisioning/datasources" mountPath: "/etc/grafana/provisioning/datasources"
@ -129,10 +150,26 @@ initContainers:
{{- end }} {{- end }}
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }} imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
env: env:
{{- range $key, $value := .Values.sidecar.notifiers.env }}
- name: "{{ $key }}"
value: "{{ $value }}"
{{- end }}
{{- if .Values.sidecar.notifiers.ignoreAlreadyProcessed }}
- name: IGNORE_ALREADY_PROCESSED
value: "true"
{{- end }}
- name: METHOD - name: METHOD
value: LIST value: LIST
- name: LABEL - name: LABEL
value: "{{ .Values.sidecar.notifiers.label }}" value: "{{ .Values.sidecar.notifiers.label }}"
{{- if .Values.sidecar.notifiers.labelValue }}
- name: LABEL_VALUE
value: {{ quote .Values.sidecar.notifiers.labelValue }}
{{- end }}
{{- if or .Values.sidecar.logLevel .Values.sidecar.notifiers.logLevel }}
- name: LOG_LEVEL
value: {{ default .Values.sidecar.logLevel .Values.sidecar.notifiers.logLevel }}
{{- end }}
- name: FOLDER - name: FOLDER
value: "/etc/grafana/provisioning/notifiers" value: "/etc/grafana/provisioning/notifiers"
- name: RESOURCE - name: RESOURCE
@ -149,20 +186,22 @@ initContainers:
- name: SKIP_TLS_VERIFY - name: SKIP_TLS_VERIFY
value: "{{ .Values.sidecar.skipTlsVerify }}" value: "{{ .Values.sidecar.skipTlsVerify }}"
{{- end }} {{- end }}
{{- if .Values.sidecar.livenessProbe }} {{- with .Values.sidecar.livenessProbe }}
livenessProbe: livenessProbe:
{{ toYaml .Values.livenessProbe | indent 6 }} {{- toYaml . | nindent 6 }}
{{- end }} {{- end }}
{{- if .Values.sidecar.readinessProbe }} {{- with .Values.sidecar.readinessProbe }}
readinessProbe: readinessProbe:
{{ toYaml .Values.readinessProbe | indent 6 }} {{- toYaml . | nindent 6 }}
{{- end }} {{- end }}
{{- with .Values.sidecar.resources }}
resources: resources:
{{ toYaml .Values.sidecar.resources | indent 6 }} {{- toYaml . | nindent 6 }}
{{- if .Values.sidecar.securityContext }} {{- end }}
{{- with .Values.sidecar.securityContext }}
securityContext: securityContext:
{{- toYaml .Values.sidecar.securityContext | nindent 6 }} {{- toYaml . | nindent 6 }}
{{- end }} {{- end }}
volumeMounts: volumeMounts:
- name: sc-notifiers-volume - name: sc-notifiers-volume
mountPath: "/etc/grafana/provisioning/notifiers" mountPath: "/etc/grafana/provisioning/notifiers"
@ -190,6 +229,14 @@ containers:
{{- end }} {{- end }}
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }} imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
env: env:
{{- range $key, $value := .Values.sidecar.dashboards.env }}
- name: "{{ $key }}"
value: "{{ $value }}"
{{- end }}
{{- if .Values.sidecar.dashboards.ignoreAlreadyProcessed }}
- name: IGNORE_ALREADY_PROCESSED
value: "true"
{{- end }}
- name: METHOD - name: METHOD
value: {{ .Values.sidecar.dashboards.watchMethod }} value: {{ .Values.sidecar.dashboards.watchMethod }}
- name: LABEL - name: LABEL
@ -198,9 +245,9 @@ containers:
- name: LABEL_VALUE - name: LABEL_VALUE
value: {{ quote .Values.sidecar.dashboards.labelValue }} value: {{ quote .Values.sidecar.dashboards.labelValue }}
{{- end }} {{- end }}
{{- if .Values.sidecar.logLevel }} {{- if or .Values.sidecar.logLevel .Values.sidecar.dashboards.logLevel }}
- name: LOG_LEVEL - name: LOG_LEVEL
value: {{ quote .Values.sidecar.logLevel }} value: {{ default .Values.sidecar.logLevel .Values.sidecar.dashboards.logLevel }}
{{- end }} {{- end }}
- name: FOLDER - name: FOLDER
value: "{{ .Values.sidecar.dashboards.folder }}{{- with .Values.sidecar.dashboards.defaultFolderName }}/{{ . }}{{- end }}" value: "{{ .Values.sidecar.dashboards.folder }}{{- with .Values.sidecar.dashboards.defaultFolderName }}/{{ . }}{{- end }}"
@ -227,27 +274,35 @@ containers:
value: "{{ .Values.sidecar.dashboards.script }}" value: "{{ .Values.sidecar.dashboards.script }}"
{{- end }} {{- end }}
{{- if .Values.sidecar.dashboards.watchServerTimeout }} {{- if .Values.sidecar.dashboards.watchServerTimeout }}
{{- if ne .Values.sidecar.dashboards.watchMethod "WATCH" }}
{{- fail (printf "Cannot use .Values.sidecar.dashboards.watchServerTimeout with .Values.sidecar.dashboards.watchMethod %s" .Values.sidecar.dashboards.watchMethod) }}
{{- end }}
- name: WATCH_SERVER_TIMEOUT - name: WATCH_SERVER_TIMEOUT
value: "{{ .Values.sidecar.dashboards.watchServerTimeout }}" value: "{{ .Values.sidecar.dashboards.watchServerTimeout }}"
{{- end }} {{- end }}
{{- if .Values.sidecar.dashboards.watchClientTimeout }} {{- if .Values.sidecar.dashboards.watchClientTimeout }}
{{- if ne .Values.sidecar.dashboards.watchMethod "WATCH" }}
{{- fail (printf "Cannot use .Values.sidecar.dashboards.watchClientTimeout with .Values.sidecar.dashboards.watchMethod %s" .Values.sidecar.dashboards.watchMethod) }}
{{- end }}
- name: WATCH_CLIENT_TIMEOUT - name: WATCH_CLIENT_TIMEOUT
value: "{{ .Values.sidecar.dashboards.watchClientTimeout }}" value: "{{ .Values.sidecar.dashboards.watchClientTimeout }}"
{{- end }} {{- end }}
{{- if .Values.sidecar.livenessProbe }} {{- with .Values.sidecar.livenessProbe }}
livenessProbe: livenessProbe:
{{ toYaml .Values.livenessProbe | indent 6 }} {{- toYaml . | nindent 6 }}
{{- end }} {{- end }}
{{- if .Values.sidecar.readinessProbe }} {{- with .Values.sidecar.readinessProbe }}
readinessProbe: readinessProbe:
{{ toYaml .Values.readinessProbe | indent 6 }} {{- toYaml . | nindent 6 }}
{{- end }} {{- end }}
{{- with .Values.sidecar.resources }}
resources: resources:
{{ toYaml .Values.sidecar.resources | indent 6 }} {{- toYaml . | nindent 6 }}
{{- if .Values.sidecar.securityContext }} {{- end }}
{{- with .Values.sidecar.securityContext }}
securityContext: securityContext:
{{- toYaml .Values.sidecar.securityContext | nindent 6 }} {{- toYaml . | nindent 6 }}
{{- end }} {{- end }}
volumeMounts: volumeMounts:
- name: sc-dashboard-volume - name: sc-dashboard-volume
mountPath: {{ .Values.sidecar.dashboards.folder | quote }} mountPath: {{ .Values.sidecar.dashboards.folder | quote }}
@ -264,6 +319,14 @@ containers:
{{- end }} {{- end }}
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }} imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
env: env:
{{- range $key, $value := .Values.sidecar.datasources.env }}
- name: "{{ $key }}"
value: "{{ $value }}"
{{- end }}
{{- if .Values.sidecar.datasources.ignoreAlreadyProcessed }}
- name: IGNORE_ALREADY_PROCESSED
value: "true"
{{- end }}
- name: METHOD - name: METHOD
value: {{ .Values.sidecar.datasources.watchMethod }} value: {{ .Values.sidecar.datasources.watchMethod }}
- name: LABEL - name: LABEL
@ -272,6 +335,10 @@ containers:
- name: LABEL_VALUE - name: LABEL_VALUE
value: {{ quote .Values.sidecar.datasources.labelValue }} value: {{ quote .Values.sidecar.datasources.labelValue }}
{{- end }} {{- end }}
{{- if or .Values.sidecar.logLevel .Values.sidecar.datasources.logLevel }}
- name: LOG_LEVEL
value: {{ default .Values.sidecar.logLevel .Values.sidecar.datasources.logLevel }}
{{- end }}
- name: FOLDER - name: FOLDER
value: "/etc/grafana/provisioning/datasources" value: "/etc/grafana/provisioning/datasources"
- name: RESOURCE - name: RESOURCE
@ -288,6 +355,10 @@ containers:
- name: SKIP_TLS_VERIFY - name: SKIP_TLS_VERIFY
value: "{{ .Values.sidecar.skipTlsVerify }}" value: "{{ .Values.sidecar.skipTlsVerify }}"
{{- end }} {{- end }}
{{- if .Values.sidecar.datasources.script }}
- name: SCRIPT
value: "{{ .Values.sidecar.datasources.script }}"
{{- end }}
{{- if and (not .Values.env.GF_SECURITY_ADMIN_USER) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }} {{- if and (not .Values.env.GF_SECURITY_ADMIN_USER) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }}
- name: REQ_USERNAME - name: REQ_USERNAME
valueFrom: valueFrom:
@ -308,20 +379,36 @@ containers:
- name: REQ_METHOD - name: REQ_METHOD
value: POST value: POST
{{- end }} {{- end }}
{{- if .Values.sidecar.livenessProbe }} {{- if .Values.sidecar.datasources.watchServerTimeout }}
{{- if ne .Values.sidecar.datasources.watchMethod "WATCH" }}
{{- fail (printf "Cannot use .Values.sidecar.datasources.watchServerTimeout with .Values.sidecar.datasources.watchMethod %s" .Values.sidecar.datasources.watchMethod) }}
{{- end }}
- name: WATCH_SERVER_TIMEOUT
value: "{{ .Values.sidecar.datasources.watchServerTimeout }}"
{{- end }}
{{- if .Values.sidecar.datasources.watchClientTimeout }}
{{- if ne .Values.sidecar.datasources.watchMethod "WATCH" }}
{{- fail (printf "Cannot use .Values.sidecar.datasources.watchClientTimeout with .Values.sidecar.datasources.watchMethod %s" .Values.sidecar.datasources.watchMethod) }}
{{- end }}
- name: WATCH_CLIENT_TIMEOUT
value: "{{ .Values.sidecar.datasources.watchClientTimeout }}"
{{- end }}
{{- with .Values.sidecar.livenessProbe }}
livenessProbe: livenessProbe:
{{ toYaml .Values.livenessProbe | indent 6 }} {{- toYaml . | nindent 6 }}
{{- end }} {{- end }}
{{- if .Values.sidecar.readinessProbe }} {{- with .Values.sidecar.readinessProbe }}
readinessProbe: readinessProbe:
{{ toYaml .Values.readinessProbe | indent 6 }} {{- toYaml . | nindent 6 }}
{{- end }} {{- end }}
{{- with .Values.sidecar.resources }}
resources: resources:
{{ toYaml .Values.sidecar.resources | indent 6 }} {{- toYaml . | nindent 6 }}
{{- if .Values.sidecar.securityContext }} {{- end }}
{{- with .Values.sidecar.securityContext }}
securityContext: securityContext:
{{- toYaml .Values.sidecar.securityContext | nindent 6 }} {{- toYaml . | nindent 6 }}
{{- end }} {{- end }}
volumeMounts: volumeMounts:
- name: sc-datasources-volume - name: sc-datasources-volume
mountPath: "/etc/grafana/provisioning/datasources" mountPath: "/etc/grafana/provisioning/datasources"
@ -335,6 +422,14 @@ containers:
{{- end }} {{- end }}
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }} imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
env: env:
{{- range $key, $value := .Values.sidecar.plugins.env }}
- name: "{{ $key }}"
value: "{{ $value }}"
{{- end }}
{{- if .Values.sidecar.plugins.ignoreAlreadyProcessed }}
- name: IGNORE_ALREADY_PROCESSED
value: "true"
{{- end }}
- name: METHOD - name: METHOD
value: {{ .Values.sidecar.plugins.watchMethod }} value: {{ .Values.sidecar.plugins.watchMethod }}
- name: LABEL - name: LABEL
@ -343,6 +438,10 @@ containers:
- name: LABEL_VALUE - name: LABEL_VALUE
value: {{ quote .Values.sidecar.plugins.labelValue }} value: {{ quote .Values.sidecar.plugins.labelValue }}
{{- end }} {{- end }}
{{- if or .Values.sidecar.logLevel .Values.sidecar.plugins.logLevel }}
- name: LOG_LEVEL
value: {{ default .Values.sidecar.logLevel .Values.sidecar.plugins.logLevel }}
{{- end }}
- name: FOLDER - name: FOLDER
value: "/etc/grafana/provisioning/plugins" value: "/etc/grafana/provisioning/plugins"
- name: RESOURCE - name: RESOURCE
@ -355,6 +454,10 @@ containers:
- name: NAMESPACE - name: NAMESPACE
value: "{{ .Values.sidecar.plugins.searchNamespace | join "," }}" value: "{{ .Values.sidecar.plugins.searchNamespace | join "," }}"
{{- end }} {{- end }}
{{- if .Values.sidecar.plugins.script }}
- name: SCRIPT
value: "{{ .Values.sidecar.plugins.script }}"
{{- end }}
{{- if .Values.sidecar.skipTlsVerify }} {{- if .Values.sidecar.skipTlsVerify }}
- name: SKIP_TLS_VERIFY - name: SKIP_TLS_VERIFY
value: "{{ .Values.sidecar.skipTlsVerify }}" value: "{{ .Values.sidecar.skipTlsVerify }}"
@ -379,20 +482,36 @@ containers:
- name: REQ_METHOD - name: REQ_METHOD
value: POST value: POST
{{- end }} {{- end }}
{{- if .Values.sidecar.livenessProbe }} {{- if .Values.sidecar.plugins.watchServerTimeout }}
{{- if ne .Values.sidecar.plugins.watchMethod "WATCH" }}
{{- fail (printf "Cannot use .Values.sidecar.plugins.watchServerTimeout with .Values.sidecar.plugins.watchMethod %s" .Values.sidecar.plugins.watchMethod) }}
{{- end }}
- name: WATCH_SERVER_TIMEOUT
value: "{{ .Values.sidecar.plugins.watchServerTimeout }}"
{{- end }}
{{- if .Values.sidecar.plugins.watchClientTimeout }}
{{- if ne .Values.sidecar.plugins.watchMethod "WATCH" }}
{{- fail (printf "Cannot use .Values.sidecar.plugins.watchClientTimeout with .Values.sidecar.plugins.watchMethod %s" .Values.sidecar.plugins.watchMethod) }}
{{- end }}
- name: WATCH_CLIENT_TIMEOUT
value: "{{ .Values.sidecar.plugins.watchClientTimeout }}"
{{- end }}
{{- with .Values.sidecar.livenessProbe }}
livenessProbe: livenessProbe:
{{ toYaml .Values.livenessProbe | indent 6 }} {{- toYaml . | nindent 6 }}
{{- end }} {{- end }}
{{- if .Values.sidecar.readinessProbe }} {{- with .Values.sidecar.readinessProbe }}
readinessProbe: readinessProbe:
{{ toYaml .Values.readinessProbe | indent 6 }} {{- toYaml . | nindent 6 }}
{{- end }} {{- end }}
{{- with .Values.sidecar.resources }}
resources: resources:
{{ toYaml .Values.sidecar.resources | indent 6 }} {{- toYaml . | nindent 6 }}
{{- if .Values.sidecar.securityContext }} {{- end }}
{{- with .Values.sidecar.securityContext }}
securityContext: securityContext:
{{- toYaml .Values.sidecar.securityContext | nindent 6 }} {{- toYaml . | nindent 6 }}
{{- end }} {{- end }}
volumeMounts: volumeMounts:
- name: sc-plugins-volume - name: sc-plugins-volume
mountPath: "/etc/grafana/provisioning/plugins" mountPath: "/etc/grafana/provisioning/plugins"
@ -410,10 +529,10 @@ containers:
- {{ . }} - {{ . }}
{{- end }} {{- end }}
{{- end}} {{- end}}
{{- if .Values.containerSecurityContext }} {{- with .Values.containerSecurityContext }}
securityContext: securityContext:
{{- toYaml .Values.containerSecurityContext | nindent 6 }} {{- toYaml . | nindent 6 }}
{{- end }} {{- end }}
volumeMounts: volumeMounts:
- name: config - name: config
mountPath: "/etc/grafana/grafana.ini" mountPath: "/etc/grafana/grafana.ini"
@ -466,6 +585,13 @@ containers:
subPath: {{ . | quote }} subPath: {{ . | quote }}
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- if .Values.alerting }}
{{- range (keys .Values.alerting | sortAlpha) }}
- name: config
mountPath: "/etc/grafana/provisioning/alerting/{{ . }}"
subPath: {{ . | quote }}
{{- end }}
{{- end }}
{{- if .Values.dashboardProviders }} {{- if .Values.dashboardProviders }}
{{- range (keys .Values.dashboardProviders | sortAlpha) }} {{- range (keys .Values.dashboardProviders | sortAlpha) }}
- name: config - name: config
@ -511,11 +637,8 @@ containers:
mountPath: {{ .mountPath }} mountPath: {{ .mountPath }}
{{- end }} {{- end }}
ports: ports:
- name: {{ .Values.service.portName }}
containerPort: {{ .Values.service.port }}
protocol: TCP
- name: {{ .Values.podPortName }} - name: {{ .Values.podPortName }}
containerPort: 3000 containerPort: {{ .Values.service.targetPort }}
protocol: TCP protocol: TCP
env: env:
{{- if and (not .Values.env.GF_SECURITY_ADMIN_USER) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }} {{- if and (not .Values.env.GF_SECURITY_ADMIN_USER) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }}
@ -595,30 +718,40 @@ containers:
optional: {{ .optional | default false }} optional: {{ .optional | default false }}
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- with .Values.livenessProbe }}
livenessProbe: livenessProbe:
{{ toYaml .Values.livenessProbe | indent 6 }} {{- toYaml . | nindent 6 }}
{{- end }}
{{- with .Values.readinessProbe }}
readinessProbe: readinessProbe:
{{ toYaml .Values.readinessProbe | indent 6 }} {{- toYaml . | nindent 6 }}
{{- end }}
{{- if .Values.lifecycleHooks }} {{- if .Values.lifecycleHooks }}
lifecycle: {{ tpl (.Values.lifecycleHooks | toYaml) . | nindent 6 }} lifecycle: {{ tpl (.Values.lifecycleHooks | toYaml) . | nindent 6 }}
{{- end }} {{- end }}
{{- with .Values.resources }}
resources: resources:
{{ toYaml .Values.resources | indent 6 }} {{- toYaml . | nindent 6 }}
{{- end }}
{{- with .Values.extraContainers }} {{- with .Values.extraContainers }}
{{ tpl . $ | indent 2 }} {{ tpl . $ | indent 2 }}
{{- end }} {{- end }}
{{- with .Values.nodeSelector }} {{- with .Values.nodeSelector }}
nodeSelector: nodeSelector:
{{ toYaml . | indent 2 }} {{- toYaml . | nindent 2 }}
{{- end }} {{- end }}
{{- $root := . }} {{- $root := . }}
{{- with .Values.affinity }} {{- with .Values.affinity }}
affinity: affinity:
{{ tpl (toYaml .) $root | indent 2 }} {{ tpl (toYaml .) $root | indent 2 }}
{{- end }} {{- end }}
{{- with .Values.topologySpreadConstraints }}
topologySpreadConstraints:
{{- toYaml . | nindent 2 }}
{{- end }}
{{- with .Values.tolerations }} {{- with .Values.tolerations }}
tolerations: tolerations:
{{ toYaml . | indent 2 }} {{- toYaml . | nindent 2 }}
{{- end }} {{- end }}
volumes: volumes:
- name: config - name: config
@ -744,6 +877,10 @@ volumes:
{{- else if .hostPath }} {{- else if .hostPath }}
hostPath: hostPath:
path: {{ .hostPath }} path: {{ .hostPath }}
{{- else if .csi }}
csi:
data:
{{ toYaml .data | nindent 6 }}
{{- else }} {{- else }}
emptyDir: {} emptyDir: {}
{{- end }} {{- end }}
@ -753,6 +890,6 @@ volumes:
emptyDir: {} emptyDir: {}
{{- end -}} {{- end -}}
{{- if .Values.extraContainerVolumes }} {{- if .Values.extraContainerVolumes }}
{{ toYaml .Values.extraContainerVolumes | indent 2 }} {{ tpl (toYaml .Values.extraContainerVolumes) . | indent 2 }}
{{- end }} {{- end }}
{{- end }} {{- end }}

View File

@ -56,6 +56,14 @@ data:
{{- end -}} {{- end -}}
{{- end -}} {{- end -}}
{{- if .Values.alerting }}
{{ $root := . }}
{{- range $key, $value := .Values.alerting }}
{{ $key }}: |
{{ tpl $value $root | indent 4 }}
{{- end -}}
{{- end -}}
{{- if .Values.dashboardProviders }} {{- if .Values.dashboardProviders }}
{{- range $key, $value := .Values.dashboardProviders }} {{- range $key, $value := .Values.dashboardProviders }}
{{ $key }}: | {{ $key }}: |
@ -86,6 +94,12 @@ data:
{{- if $value.token }} {{- if $value.token }}
-H "Authorization: token {{ $value.token }}" \ -H "Authorization: token {{ $value.token }}" \
{{- end }} {{- end }}
{{- if $value.bearerToken }}
-H "Authorization: Bearer {{ $value.bearerToken }}" \
{{- end }}
{{- if $value.gitlabToken }}
-H "PRIVATE-TOKEN: {{ $value.gitlabToken }}" \
{{- end }}
-H "Content-Type: application/json;charset=UTF-8" \ -H "Content-Type: application/json;charset=UTF-8" \
{{ end }} {{ end }}
{{- $dpPath := "" -}} {{- $dpPath := "" -}}

View File

@ -46,5 +46,5 @@ spec:
{{ toYaml . | indent 8 }} {{ toYaml . | indent 8 }}
{{- end }} {{- end }}
spec: spec:
{{- include "grafana.pod" . | nindent 6 }} {{- include "grafana.pod" . | indent 6 }}
{{- end }} {{- end }}

View File

@ -18,5 +18,5 @@ spec:
ports: ports:
- protocol: TCP - protocol: TCP
port: 3000 port: 3000
targetPort: 3000 targetPort: {{ .Values.service.targetPort }}
{{- end }} {{- end }}

View File

@ -77,7 +77,7 @@ spec:
{{- end}} {{- end}}
ports: ports:
- name: {{ .Values.imageRenderer.service.portName }} - name: {{ .Values.imageRenderer.service.portName }}
containerPort: {{ .Values.imageRenderer.service.port }} containerPort: {{ .Values.imageRenderer.service.targetPort }}
protocol: TCP protocol: TCP
livenessProbe: livenessProbe:
httpGet: httpGet:
@ -85,7 +85,7 @@ spec:
port: {{ .Values.imageRenderer.service.portName }} port: {{ .Values.imageRenderer.service.portName }}
env: env:
- name: HTTP_PORT - name: HTTP_PORT
value: {{ .Values.imageRenderer.service.port | quote }} value: {{ .Values.imageRenderer.service.targetPort | quote }}
{{- range $key, $value := .Values.imageRenderer.env }} {{- range $key, $value := .Values.imageRenderer.env }}
- name: {{ $key | quote }} - name: {{ $key | quote }}
value: {{ $value | quote }} value: {{ $value | quote }}

View File

@ -19,7 +19,7 @@ spec:
- Ingress - Ingress
ingress: ingress:
- ports: - ports:
- port: {{ .Values.imageRenderer.service.port }} - port: {{ .Values.imageRenderer.service.targetPort }}
protocol: TCP protocol: TCP
from: from:
- namespaceSelector: - namespaceSelector:

View File

@ -24,6 +24,9 @@ spec:
port: {{ .Values.imageRenderer.service.port }} port: {{ .Values.imageRenderer.service.port }}
protocol: TCP protocol: TCP
targetPort: {{ .Values.imageRenderer.service.targetPort }} targetPort: {{ .Values.imageRenderer.service.targetPort }}
{{- if .Values.imageRenderer.appProtocol }}
appProtocol: {{ .Values.imageRenderer.appProtocol }}
{{- end }}
selector: selector:
{{- include "grafana.imageRenderer.selectorLabels" . | nindent 4 }} {{- include "grafana.imageRenderer.selectorLabels" . | nindent 4 }}
{{ end }} {{ end }}

View File

@ -6,13 +6,13 @@ metadata:
namespace: {{ template "grafana.namespace" . }} namespace: {{ template "grafana.namespace" . }}
labels: labels:
{{- include "grafana.labels" . | nindent 4 }} {{- include "grafana.labels" . | nindent 4 }}
{{- if .Values.labels }} {{- with .Values.labels }}
{{ toYaml .Values.labels | indent 4 }} {{ toYaml . | nindent 4 }}
{{- end }} {{- end }}
{{- with .Values.annotations }} {{- with .Values.annotations }}
annotations: annotations:
{{ toYaml . | indent 4 }} {{- toYaml . | nindent 4 }}
{{- end }} {{- end }}
spec: spec:
policyTypes: policyTypes:
{{- if .Values.networkPolicy.ingress }} {{- if .Values.networkPolicy.ingress }}
@ -39,10 +39,10 @@ spec:
- podSelector: - podSelector:
matchLabels: matchLabels:
{{ template "grafana.fullname" . }}-client: "true" {{ template "grafana.fullname" . }}-client: "true"
{{- if .Values.networkPolicy.explicitNamespacesSelector }} {{- with .Values.networkPolicy.explicitNamespacesSelector }}
namespaceSelector: - namespaceSelector:
{{ toYaml .Values.networkPolicy.explicitNamespacesSelector | indent 12 }} {{- toYaml . | nindent 12 }}
{{- end }} {{- end }}
- podSelector: - podSelector:
matchLabels: matchLabels:
{{- include "grafana.labels" . | nindent 14 }} {{- include "grafana.labels" . | nindent 14 }}

View File

@ -1,5 +1,6 @@
{{- if .Values.rbac.pspEnabled }} {{- if .Values.rbac.pspEnabled }}
apiVersion: {{ include "grafana.podSecurityPolicy.apiVersion" . }} {{- if .Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy" }}
apiVersion: policy/v1beta1
kind: PodSecurityPolicy kind: PodSecurityPolicy
metadata: metadata:
name: {{ template "grafana.fullname" . }} name: {{ template "grafana.fullname" . }}
@ -47,3 +48,4 @@ spec:
max: 65535 max: 65535
readOnlyRootFilesystem: false readOnlyRootFilesystem: false
{{- end }} {{- end }}
{{- end }}

View File

@ -9,9 +9,10 @@ metadata:
{{- if .Values.service.labels }} {{- if .Values.service.labels }}
{{ toYaml .Values.service.labels | indent 4 }} {{ toYaml .Values.service.labels | indent 4 }}
{{- end }} {{- end }}
{{- $root := . }}
{{- with .Values.service.annotations }} {{- with .Values.service.annotations }}
annotations: annotations:
{{ toYaml . | indent 4 }} {{ tpl (toYaml . | indent 4) $root }}
{{- end }} {{- end }}
spec: spec:
{{- if (or (eq .Values.service.type "ClusterIP") (empty .Values.service.type)) }} {{- if (or (eq .Values.service.type "ClusterIP") (empty .Values.service.type)) }}
@ -40,12 +41,15 @@ spec:
port: {{ .Values.service.port }} port: {{ .Values.service.port }}
protocol: TCP protocol: TCP
targetPort: {{ .Values.service.targetPort }} targetPort: {{ .Values.service.targetPort }}
{{ if (and (eq .Values.service.type "NodePort") (not (empty .Values.service.nodePort))) }} {{- if .Values.service.appProtocol }}
appProtocol: {{ .Values.service.appProtocol }}
{{- end }}
{{- if (and (eq .Values.service.type "NodePort") (not (empty .Values.service.nodePort))) }}
nodePort: {{.Values.service.nodePort}} nodePort: {{.Values.service.nodePort}}
{{ end }} {{ end }}
{{- if .Values.extraExposePorts }} {{- if .Values.extraExposePorts }}
{{- tpl (toYaml .Values.extraExposePorts) . | indent 4 }} {{- tpl (toYaml .Values.extraExposePorts) . | nindent 4 }}
{{- end }} {{- end }}
selector: selector:
{{- include "grafana.selectorLabels" . | nindent 4 }} {{- include "grafana.selectorLabels" . | nindent 4 }}
{{ end }} {{ end }}

View File

@ -5,7 +5,7 @@ kind: ServiceMonitor
metadata: metadata:
name: {{ template "grafana.fullname" . }} name: {{ template "grafana.fullname" . }}
{{- if .Values.serviceMonitor.namespace }} {{- if .Values.serviceMonitor.namespace }}
namespace: {{ .Values.serviceMonitor.namespace }} namespace: {{ tpl .Values.serviceMonitor.namespace . }}
{{- else }} {{- else }}
namespace: {{ template "grafana.namespace" . }} namespace: {{ template "grafana.namespace" . }}
{{- end }} {{- end }}

View File

@ -11,7 +11,7 @@ data:
@test "Test Health" { @test "Test Health" {
url="http://{{ template "grafana.fullname" . }}/api/health" url="http://{{ template "grafana.fullname" . }}/api/health"
code=$(wget --server-response --spider --timeout 10 --tries 1 ${url} 2>&1 | awk '/^ HTTP/{print $2}') code=$(wget --server-response --spider --timeout 90 --tries 10 ${url} 2>&1 | awk '/^ HTTP/{print $2}')
[ "$code" == "200" ] [ "$code" == "200" ]
} }
{{- end }} {{- end }}

View File

@ -1,4 +1,5 @@
{{- if and .Values.testFramework.enabled .Values.rbac.pspEnabled }} {{- if and .Values.testFramework.enabled .Values.rbac.pspEnabled }}
{{- if .Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy" }}
apiVersion: policy/v1beta1 apiVersion: policy/v1beta1
kind: PodSecurityPolicy kind: PodSecurityPolicy
metadata: metadata:
@ -27,3 +28,4 @@ spec:
- csi - csi
- secret - secret
{{- end }} {{- end }}
{{- end }}

View File

@ -136,6 +136,7 @@ downloadDashboards:
env: {} env: {}
envFromSecret: "" envFromSecret: ""
resources: {} resources: {}
securityContext: {}
## Pod Annotations ## Pod Annotations
# podAnnotations: {} # podAnnotations: {}
@ -158,9 +159,12 @@ service:
port: 80 port: 80
targetPort: 3000 targetPort: 3000
# targetPort: 4181 To be used with a proxy extraContainer # targetPort: 4181 To be used with a proxy extraContainer
## Service annotations. Can be templated.
annotations: {} annotations: {}
labels: {} labels: {}
portName: service portName: service
# Adds the appProtocol field to the service. This allows to work with istio protocol selection. Ex: "http" or "tcp"
appProtocol: ""
serviceMonitor: serviceMonitor:
## If true, a ServiceMonitor CRD is created for a prometheus operator ## If true, a ServiceMonitor CRD is created for a prometheus operator
@ -249,6 +253,11 @@ tolerations: []
## ##
affinity: {} affinity: {}
## Topology Spread Constraints
## ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/
##
topologySpreadConstraints: []
## Additional init containers (evaluated as template) ## Additional init containers (evaluated as template)
## ref: https://kubernetes.io/docs/concepts/workloads/pods/init-containers/ ## ref: https://kubernetes.io/docs/concepts/workloads/pods/init-containers/
## ##
@ -468,6 +477,13 @@ extraVolumeMounts: []
# mountPath: /mnt/volume1 # mountPath: /mnt/volume1
# readOnly: true # readOnly: true
# hostPath: /usr/shared/ # hostPath: /usr/shared/
# - name: grafana-secrets
# csi: true
# data:
# driver: secrets-store.csi.k8s.io
# readOnly: true
# volumeAttributes:
# secretProviderClass: "grafana-env-spc"
## Container Lifecycle Hooks. Execute a specific bash command or make an HTTP request ## Container Lifecycle Hooks. Execute a specific bash command or make an HTTP request
lifecycleHooks: {} lifecycleHooks: {}
@ -502,6 +518,71 @@ datasources: {}
# authType: default # authType: default
# defaultRegion: us-east-1 # defaultRegion: us-east-1
## Configure grafana alerting (can be templated)
## ref: http://docs.grafana.org/administration/provisioning/#alerting
##
alerting: {}
# rules.yaml: |
# apiVersion: 1
# groups:
# - orgId: 1
# name: {{ .Chart.Name }}_my_rule_group
# folder: my_first_folder
# interval: 60s
# rules:
# - uid: my_id_1
# title: my_first_rule
# condition: A
# data:
# - refId: A
# datasourceUid: '-100'
# model:
# conditions:
# - evaluator:
# params:
# - 3
# type: gt
# operator:
# type: and
# query:
# params:
# - A
# reducer:
# type: last
# type: query
# datasource:
# type: __expr__
# uid: '-100'
# expression: 1==0
# intervalMs: 1000
# maxDataPoints: 43200
# refId: A
# type: math
# dashboardUid: my_dashboard
# panelId: 123
# noDataState: Alerting
# for: 60s
# annotations:
# some_key: some_value
# labels:
# team: sre_team_1
# contactpoints.yaml: |
# apiVersion: 1
# contactPoints:
# - orgId: 1
# name: cp_1
# receivers:
# - uid: first_uid
# type: pagerduty
# settings:
# integrationKey: XXX
# severity: critical
# class: ping failure
# component: Grafana
# group: app-stack
# summary: |
# {{ `{{ template "default.message" . }}` }}
## Configure notifiers ## Configure notifiers
## ref: http://docs.grafana.org/administration/provisioning/#alert-notification-channels ## ref: http://docs.grafana.org/administration/provisioning/#alert-notification-channels
## ##
@ -562,6 +643,12 @@ dashboards: {}
# url: https://example.com/repository/test-b64.json # url: https://example.com/repository/test-b64.json
# token: '' # token: ''
# b64content: true # b64content: true
# local-dashboard-gitlab:
# url: https://example.com/repository/test-gitlab.json
# gitlabToken: ''
# local-dashboard-bitbucket:
# url: https://example.com/repository/test-bitbucket.json
# bearerToken: ''
## Reference to external ConfigMap per provider. Use provider name as key and ConfigMap name as value. ## Reference to external ConfigMap per provider. Use provider name as key and ConfigMap name as value.
## A provider dashboards must be defined either by external ConfigMaps or in values.yaml, not in both. ## A provider dashboards must be defined either by external ConfigMaps or in values.yaml, not in both.
@ -590,6 +677,8 @@ grafana.ini:
mode: console mode: console
grafana_net: grafana_net:
url: https://grafana.net url: https://grafana.net
server:
domain: "{{ if (and .Values.ingress.enabled .Values.ingress.hosts) }}{{ .Values.ingress.hosts | first }}{{ end }}"
## grafana Authentication can be enabled with the following values on grafana.ini ## grafana Authentication can be enabled with the following values on grafana.ini
# server: # server:
# The full public facing url you use in browser, used for redirects and emails # The full public facing url you use in browser, used for redirects and emails
@ -667,15 +756,21 @@ sidecar:
enableUniqueFilenames: false enableUniqueFilenames: false
readinessProbe: {} readinessProbe: {}
livenessProbe: {} livenessProbe: {}
# Log level. Can be one of: DEBUG, INFO, WARN, ERROR, CRITICAL. # Log level default for all sidecars. Can be one of: DEBUG, INFO, WARN, ERROR, CRITICAL. Defaults to INFO
logLevel: INFO # logLevel: INFO
dashboards: dashboards:
enabled: false enabled: false
# Additional environment variables for the dashboards sidecar
env: {}
# Do not reprocess already processed unchanged resources on k8s API reconnect.
# ignoreAlreadyProcessed: true
SCProvider: true SCProvider: true
# label that the configmaps with dashboards are marked with # label that the configmaps with dashboards are marked with
label: grafana_dashboard label: grafana_dashboard
# value of label that the configmaps with dashboards are set to # value of label that the configmaps with dashboards are set to
labelValue: "" labelValue: ""
# Log level. Can be one of: DEBUG, INFO, WARN, ERROR, CRITICAL.
# logLevel: INFO
# folder in the pod that should hold the collected dashboards (unless `defaultFolderName` is set) # folder in the pod that should hold the collected dashboards (unless `defaultFolderName` is set)
folder: /tmp/dashboards folder: /tmp/dashboards
# The default folder name, it will create a subfolder under the `folder` and put dashboards in there instead # The default folder name, it will create a subfolder under the `folder` and put dashboards in there instead
@ -725,10 +820,16 @@ sidecar:
sizeLimit: {} sizeLimit: {}
datasources: datasources:
enabled: false enabled: false
# Additional environment variables for the datasourcessidecar
env: {}
# Do not reprocess already processed unchanged resources on k8s API reconnect.
# ignoreAlreadyProcessed: true
# label that the configmaps with datasources are marked with # label that the configmaps with datasources are marked with
label: grafana_datasource label: grafana_datasource
# value of label that the configmaps with datasources are set to # value of label that the configmaps with datasources are set to
labelValue: "" labelValue: ""
# Log level. Can be one of: DEBUG, INFO, WARN, ERROR, CRITICAL.
# logLevel: INFO
# If specified, the sidecar will search for datasource config-maps inside this namespace. # If specified, the sidecar will search for datasource config-maps inside this namespace.
# Otherwise the namespace in which the sidecar is running will be used. # Otherwise the namespace in which the sidecar is running will be used.
# It's also possible to specify ALL to search in all namespaces # It's also possible to specify ALL to search in all namespaces
@ -737,8 +838,20 @@ sidecar:
watchMethod: WATCH watchMethod: WATCH
# search in configmap, secret or both # search in configmap, secret or both
resource: both resource: both
# watchServerTimeout: request to the server, asking it to cleanly close the connection after that.
# defaults to 60sec; much higher values like 3600 seconds (1h) are feasible for non-Azure K8S
# watchServerTimeout: 3600
#
# watchClientTimeout: is a client-side timeout, configuring your local socket.
# If you have a network outage dropping all packets with no RST/FIN,
# this is how long your client waits before realizing & dropping the connection.
# defaults to 66sec (sic!)
# watchClientTimeout: 60
#
# Endpoint to send request to reload datasources # Endpoint to send request to reload datasources
reloadURL: "http://localhost:3000/api/admin/provisioning/datasources/reload" reloadURL: "http://localhost:3000/api/admin/provisioning/datasources/reload"
# Absolute path to shell script to execute after a datasource got reloaded
script: null
skipReload: false skipReload: false
# Deploy the datasource sidecar as an initContainer in addition to a container. # Deploy the datasource sidecar as an initContainer in addition to a container.
# This is needed if skipReload is true, to load any datasources defined at startup time. # This is needed if skipReload is true, to load any datasources defined at startup time.
@ -747,10 +860,16 @@ sidecar:
sizeLimit: {} sizeLimit: {}
plugins: plugins:
enabled: false enabled: false
# Additional environment variables for the plugins sidecar
env: {}
# Do not reprocess already processed unchanged resources on k8s API reconnect.
# ignoreAlreadyProcessed: true
# label that the configmaps with plugins are marked with # label that the configmaps with plugins are marked with
label: grafana_plugin label: grafana_plugin
# value of label that the configmaps with plugins are set to # value of label that the configmaps with plugins are set to
labelValue: "" labelValue: ""
# Log level. Can be one of: DEBUG, INFO, WARN, ERROR, CRITICAL.
# logLevel: INFO
# If specified, the sidecar will search for plugin config-maps inside this namespace. # If specified, the sidecar will search for plugin config-maps inside this namespace.
# Otherwise the namespace in which the sidecar is running will be used. # Otherwise the namespace in which the sidecar is running will be used.
# It's also possible to specify ALL to search in all namespaces # It's also possible to specify ALL to search in all namespaces
@ -759,8 +878,20 @@ sidecar:
watchMethod: WATCH watchMethod: WATCH
# search in configmap, secret or both # search in configmap, secret or both
resource: both resource: both
# watchServerTimeout: request to the server, asking it to cleanly close the connection after that.
# defaults to 60sec; much higher values like 3600 seconds (1h) are feasible for non-Azure K8S
# watchServerTimeout: 3600
#
# watchClientTimeout: is a client-side timeout, configuring your local socket.
# If you have a network outage dropping all packets with no RST/FIN,
# this is how long your client waits before realizing & dropping the connection.
# defaults to 66sec (sic!)
# watchClientTimeout: 60
#
# Endpoint to send request to reload plugins # Endpoint to send request to reload plugins
reloadURL: "http://localhost:3000/api/admin/provisioning/plugins/reload" reloadURL: "http://localhost:3000/api/admin/provisioning/plugins/reload"
# Absolute path to shell script to execute after a plugin got reloaded
script: null
skipReload: false skipReload: false
# Deploy the datasource sidecar as an initContainer in addition to a container. # Deploy the datasource sidecar as an initContainer in addition to a container.
# This is needed if skipReload is true, to load any plugins defined at startup time. # This is needed if skipReload is true, to load any plugins defined at startup time.
@ -769,8 +900,16 @@ sidecar:
sizeLimit: {} sizeLimit: {}
notifiers: notifiers:
enabled: false enabled: false
# Additional environment variables for the notifierssidecar
env: {}
# Do not reprocess already processed unchanged resources on k8s API reconnect.
# ignoreAlreadyProcessed: true
# label that the configmaps with notifiers are marked with # label that the configmaps with notifiers are marked with
label: grafana_notifier label: grafana_notifier
# value of label that the configmaps with notifiers are set to
labelValue: ""
# Log level. Can be one of: DEBUG, INFO, WARN, ERROR, CRITICAL.
# logLevel: INFO
# If specified, the sidecar will search for notifier config-maps inside this namespace. # If specified, the sidecar will search for notifier config-maps inside this namespace.
# Otherwise the namespace in which the sidecar is running will be used. # Otherwise the namespace in which the sidecar is running will be used.
# It's also possible to specify ALL to search in all namespaces # It's also possible to specify ALL to search in all namespaces
@ -824,6 +963,8 @@ imageRenderer:
# image-renderer service port used by both service and deployment # image-renderer service port used by both service and deployment
port: 8081 port: 8081
targetPort: 8081 targetPort: 8081
# Adds the appProtocol field to the image-renderer service. This allows to work with istio protocol selection. Ex: "http" or "tcp"
appProtocol: ""
# If https is enabled in Grafana, this needs to be set as 'https' to correctly configure the callback used in Grafana # If https is enabled in Grafana, this needs to be set as 'https' to correctly configure the callback used in Grafana
grafanaProtocol: http grafanaProtocol: http
# In case a sub_path is used this needs to be added to the image renderer callback # In case a sub_path is used this needs to be added to the image renderer callback

View File

@ -1,5 +1,5 @@
apiVersion: v2 apiVersion: v2
appVersion: 2.5.0 appVersion: 2.6.0
description: Install kube-state-metrics to generate and expose cluster-level metrics description: Install kube-state-metrics to generate and expose cluster-level metrics
home: https://github.com/kubernetes/kube-state-metrics/ home: https://github.com/kubernetes/kube-state-metrics/
keywords: keywords:
@ -18,4 +18,4 @@ name: kube-state-metrics
sources: sources:
- https://github.com/kubernetes/kube-state-metrics/ - https://github.com/kubernetes/kube-state-metrics/
type: application type: application
version: 4.15.0 version: 4.18.0

View File

@ -92,7 +92,9 @@ spec:
{{- if .Values.selfMonitor.telemetryHost }} {{- if .Values.selfMonitor.telemetryHost }}
- --telemetry-host={{ .Values.selfMonitor.telemetryHost }} - --telemetry-host={{ .Values.selfMonitor.telemetryHost }}
{{- end }} {{- end }}
{{- if .Values.selfMonitor.telemetryPort }}
- --telemetry-port={{ .Values.selfMonitor.telemetryPort | default 8081 }} - --telemetry-port={{ .Values.selfMonitor.telemetryPort | default 8081 }}
{{- end }}
{{- if or (.Values.kubeconfig.enabled) (.Values.volumeMounts) }} {{- if or (.Values.kubeconfig.enabled) (.Values.volumeMounts) }}
volumeMounts: volumeMounts:
{{- if .Values.kubeconfig.enabled }} {{- if .Values.kubeconfig.enabled }}
@ -105,7 +107,11 @@ spec:
{{- end }} {{- end }}
{{- end }} {{- end }}
imagePullPolicy: {{ .Values.image.pullPolicy }} imagePullPolicy: {{ .Values.image.pullPolicy }}
{{- if .Values.image.sha }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}@sha256:{{ .Values.image.sha }}"
{{- else }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
{{- end }}
ports: ports:
- containerPort: {{ .Values.service.port | default 8080}} - containerPort: {{ .Values.service.port | default 8080}}
name: "http" name: "http"

View File

@ -2,7 +2,8 @@
prometheusScrape: true prometheusScrape: true
image: image:
repository: registry.k8s.io/kube-state-metrics/kube-state-metrics repository: registry.k8s.io/kube-state-metrics/kube-state-metrics
tag: v2.5.0 tag: v2.6.0
sha: ""
pullPolicy: IfNotPresent pullPolicy: IfNotPresent
imagePullSecrets: [] imagePullSecrets: []

View File

@ -15,4 +15,4 @@ name: prometheus-node-exporter
sources: sources:
- https://github.com/prometheus/node_exporter/ - https://github.com/prometheus/node_exporter/
type: application type: application
version: 3.3.1 version: 4.2.0

View File

@ -1,17 +1,17 @@
# Prometheus Node Exporter # Prometheus `Node Exporter`
Prometheus exporter for hardware and OS metrics exposed by *NIX kernels, written in Go with pluggable metric collectors. Prometheus exporter for hardware and OS metrics exposed by *NIX kernels, written in Go with pluggable metric collectors.
This chart bootstraps a prometheus [Node Exporter](http://github.com/prometheus/node_exporter) deployment on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager. This chart bootstraps a prometheus [`Node Exporter`](http://github.com/prometheus/node_exporter) daemonset on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager.
## Get Repo Info ## Get Repository Info
```console ```console
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update helm repo update
``` ```
_See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation._ _See [`helm repo`](https://helm.sh/docs/helm/helm_repo/) for command documentation._
## Install Chart ## Install Chart
@ -19,7 +19,7 @@ _See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation
helm install [RELEASE_NAME] prometheus-community/prometheus-node-exporter helm install [RELEASE_NAME] prometheus-community/prometheus-node-exporter
``` ```
_See [configuration](#configuration) below._ _See [configuration](#configuring) below._
_See [helm install](https://helm.sh/docs/helm/helm_install/) for command documentation._ _See [helm install](https://helm.sh/docs/helm/helm_install/) for command documentation._
@ -41,6 +41,17 @@ helm upgrade [RELEASE_NAME] [CHART] --install
_See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documentation._ _See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documentation._
### 3.x to 4.x
Starting from version 4.0.0, the `node exporter` chart is using the [Kubernetes recommended labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/common-labels/). Therefore you have to delete the daemonset before you upgrade.
```console
kubectl delete daemonset -l app=prometheus-node-exporter
helm upgrade -i prometheus-node-exporter prometheus-community/prometheus-node-exporter
```
If you use your own custom [ServiceMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#servicemonitor) or [PodMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#podmonitor), please ensure to upgrade their `selector` fields accordingly to the new labels.
### From 2.x to 3.x ### From 2.x to 3.x
Change the following: Change the following:

View File

@ -9,7 +9,7 @@
export SERVICE_IP=$(kubectl get svc --namespace {{ template "prometheus-node-exporter.namespace" . }} {{ template "prometheus-node-exporter.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') export SERVICE_IP=$(kubectl get svc --namespace {{ template "prometheus-node-exporter.namespace" . }} {{ template "prometheus-node-exporter.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
echo http://$SERVICE_IP:{{ .Values.service.port }} echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }} {{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ template "prometheus-node-exporter.namespace" . }} -l "app={{ template "prometheus-node-exporter.name" . }},release={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") export POD_NAME=$(kubectl get pods --namespace {{ template "prometheus-node-exporter.namespace" . }} -l "app.kubernetes.io/name={{ template "prometheus-node-exporter.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
echo "Visit http://127.0.0.1:9100 to use your application" echo "Visit http://127.0.0.1:9100 to use your application"
kubectl port-forward --namespace {{ template "prometheus-node-exporter.namespace" . }} $POD_NAME 9100 kubectl port-forward --namespace {{ template "prometheus-node-exporter.namespace" . }} $POD_NAME 9100
{{- end }} {{- end }}

View File

@ -26,13 +26,28 @@ If release name contains chart name it will be used as a full name.
{{/* Generate basic labels */}} {{/* Generate basic labels */}}
{{- define "prometheus-node-exporter.labels" }} {{- define "prometheus-node-exporter.labels" }}
app: {{ template "prometheus-node-exporter.name" . }} helm.sh/chart: {{ template "prometheus-node-exporter.chart" . }}
heritage: {{.Release.Service }} app.kubernetes.io/managed-by: {{ .Release.Service }}
release: {{.Release.Name }} app.kubernetes.io/component: metrics
chart: {{ template "prometheus-node-exporter.chart" . }} app.kubernetes.io/part-of: {{ template "prometheus-node-exporter.name" . }}
{{- include "prometheus-node-exporter.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
{{- if .Values.podLabels}} {{- if .Values.podLabels}}
{{ toYaml .Values.podLabels }} {{ toYaml .Values.podLabels }}
{{- end }} {{- end }}
{{- if .Values.releaseLabel }}
release: {{ .Release.Name }}
{{- end }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "prometheus-node-exporter.selectorLabels" }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/name: {{ template "prometheus-node-exporter.name" . }}
{{- end }} {{- end }}
{{/* {{/*
@ -58,8 +73,12 @@ Create the name of the service account to use
The image to use The image to use
*/}} */}}
{{- define "prometheus-node-exporter.image" -}} {{- define "prometheus-node-exporter.image" -}}
{{- if .Values.image.sha -}}
{{- printf "%s:%s@%s" .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) .Values.image.sha }}
{{- else -}}
{{- printf "%s:%s" .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) }} {{- printf "%s:%s" .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) }}
{{- end }} {{- end }}
{{- end }}
{{/* {{/*
Allow the release namespace to be overridden for multi-namespace deployments in combined charts Allow the release namespace to be overridden for multi-namespace deployments in combined charts

View File

@ -7,8 +7,7 @@ metadata:
spec: spec:
selector: selector:
matchLabels: matchLabels:
app: {{ template "prometheus-node-exporter.name" . }} {{- include "prometheus-node-exporter.selectorLabels" . | indent 6 }}
release: {{ .Release.Name }}
{{- if .Values.updateStrategy }} {{- if .Values.updateStrategy }}
updateStrategy: updateStrategy:
{{ toYaml .Values.updateStrategy | indent 4 }} {{ toYaml .Values.updateStrategy | indent 4 }}
@ -147,13 +146,21 @@ spec:
{{- end }} {{- end }}
{{- if .Values.sidecars }} {{- if .Values.sidecars }}
{{ toYaml .Values.sidecars | indent 8 }} {{ toYaml .Values.sidecars | indent 8 }}
{{- if .Values.sidecarVolumeMount }} {{- if or .Values.sidecarVolumeMount .Values.sidecarHostVolumeMounts }}
volumeMounts: volumeMounts:
{{- range $_, $mount := .Values.sidecarVolumeMount }} {{- range $_, $mount := .Values.sidecarVolumeMount }}
- name: {{ $mount.name }} - name: {{ $mount.name }}
mountPath: {{ $mount.mountPath }} mountPath: {{ $mount.mountPath }}
readOnly: {{ $mount.readOnly }} readOnly: {{ $mount.readOnly }}
{{- end }} {{- end }}
{{- range $_, $mount := .Values.sidecarHostVolumeMounts }}
- name: {{ $mount.name }}
mountPath: {{ $mount.mountPath }}
readOnly: {{ $mount.readOnly }}
{{- if $mount.mountPropagation }}
mountPropagation: {{ $mount.mountPropagation }}
{{- end }}
{{- end }}
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- if .Values.imagePullSecrets }} {{- if .Values.imagePullSecrets }}
@ -204,6 +211,13 @@ spec:
medium: Memory medium: Memory
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- if .Values.sidecarHostVolumeMounts }}
{{- range $_, $mount := .Values.sidecarHostVolumeMounts }}
- name: {{ $mount.name }}
hostPath:
path: {{ $mount.hostPath }}
{{- end }}
{{- end }}
{{- if .Values.configmaps }} {{- if .Values.configmaps }}
{{- range $_, $mount := .Values.configmaps }} {{- range $_, $mount := .Values.configmaps }}
- name: {{ $mount.name }} - name: {{ $mount.name }}

View File

@ -4,8 +4,7 @@ kind: Endpoints
metadata: metadata:
name: {{ template "prometheus-node-exporter.fullname" . }} name: {{ template "prometheus-node-exporter.fullname" . }}
namespace: {{ template "prometheus-node-exporter.namespace" . }} namespace: {{ template "prometheus-node-exporter.namespace" . }}
labels: labels: {{ include "prometheus-node-exporter.labels" . | indent 4 }}
{{ include "prometheus-node-exporter.labels" . | indent 4 }}
subsets: subsets:
- addresses: - addresses:
{{- range .Values.endpoints }} {{- range .Values.endpoints }}

View File

@ -3,11 +3,11 @@ kind: Service
metadata: metadata:
name: {{ template "prometheus-node-exporter.fullname" . }} name: {{ template "prometheus-node-exporter.fullname" . }}
namespace: {{ template "prometheus-node-exporter.namespace" . }} namespace: {{ template "prometheus-node-exporter.namespace" . }}
labels: {{ include "prometheus-node-exporter.labels" . | indent 4 }}
{{- if .Values.service.annotations }} {{- if .Values.service.annotations }}
annotations: annotations:
{{ toYaml .Values.service.annotations | indent 4 }} {{ toYaml .Values.service.annotations | indent 4 }}
{{- end }} {{- end }}
labels: {{ include "prometheus-node-exporter.labels" . | indent 4 }}
spec: spec:
type: {{ .Values.service.type }} type: {{ .Values.service.type }}
ports: ports:
@ -19,5 +19,4 @@ spec:
protocol: TCP protocol: TCP
name: {{ .Values.service.portName }} name: {{ .Values.service.portName }}
selector: selector:
app: {{ template "prometheus-node-exporter.name" . }} {{- include "prometheus-node-exporter.selectorLabels" . | indent 4 }}
release: {{ .Release.Name }}

View File

@ -5,11 +5,7 @@ kind: ServiceAccount
metadata: metadata:
name: {{ template "prometheus-node-exporter.serviceAccountName" . }} name: {{ template "prometheus-node-exporter.serviceAccountName" . }}
namespace: {{ template "prometheus-node-exporter.namespace" . }} namespace: {{ template "prometheus-node-exporter.namespace" . }}
labels: labels: {{ include "prometheus-node-exporter.labels" . | indent 4 }}
app: {{ template "prometheus-node-exporter.name" . }}
chart: {{ template "prometheus-node-exporter.chart" . }}
release: "{{ .Release.Name }}"
heritage: "{{ .Release.Service }}"
annotations: annotations:
{{ toYaml .Values.serviceAccount.annotations | indent 4 }} {{ toYaml .Values.serviceAccount.annotations | indent 4 }}
imagePullSecrets: imagePullSecrets:

View File

@ -15,8 +15,7 @@ spec:
{{- if .Values.prometheus.monitor.selectorOverride }} {{- if .Values.prometheus.monitor.selectorOverride }}
{{ toYaml .Values.prometheus.monitor.selectorOverride | indent 6 }} {{ toYaml .Values.prometheus.monitor.selectorOverride | indent 6 }}
{{ else }} {{ else }}
app: {{ template "prometheus-node-exporter.name" . }} {{ include "prometheus-node-exporter.selectorLabels" . | indent 6 }}
release: {{ .Release.Name }}
{{- end }} {{- end }}
endpoints: endpoints:
- port: {{ .Values.service.portName }} - port: {{ .Values.service.portName }}

View File

@ -6,6 +6,7 @@ image:
# Overrides the image tag whose default is {{ printf "v%s" .Chart.AppVersion }} # Overrides the image tag whose default is {{ printf "v%s" .Chart.AppVersion }}
tag: "" tag: ""
pullPolicy: IfNotPresent pullPolicy: IfNotPresent
sha: ""
imagePullSecrets: [] imagePullSecrets: []
# - name: "image-pull-secret" # - name: "image-pull-secret"
@ -139,6 +140,9 @@ podAnnotations:
# Extra labels to be added to node exporter pods # Extra labels to be added to node exporter pods
podLabels: {} podLabels: {}
## set to true to add the release label so scraping of the servicemonitor with kube-prometheus-stack works out of the box
releaseLabel: false
# Custom DNS configuration to be added to prometheus-node-exporter pods # Custom DNS configuration to be added to prometheus-node-exporter pods
dnsConfig: {} dnsConfig: {}
# nameservers: # nameservers:
@ -170,7 +174,7 @@ extraArgs: []
# - --collector.diskstats.ignored-devices=^(ram|loop|fd|(h|s|v)d[a-z]|nvme\\d+n\\d+p)\\d+$ # - --collector.diskstats.ignored-devices=^(ram|loop|fd|(h|s|v)d[a-z]|nvme\\d+n\\d+p)\\d+$
# - --collector.textfile.directory=/run/prometheus # - --collector.textfile.directory=/run/prometheus
## Additional mounts from the host ## Additional mounts from the host to node-exporter container
## ##
extraHostVolumeMounts: [] extraHostVolumeMounts: []
# - name: <mountName> # - name: <mountName>
@ -204,6 +208,15 @@ sidecarVolumeMount: []
## mountPath: /run/prometheus ## mountPath: /run/prometheus
## readOnly: false ## readOnly: false
## Additional mounts from the host to sidecar containers
##
sidecarHostVolumeMounts: []
# - name: <mountName>
# hostPath: <hostPath>
# mountPath: <mountPath>
# readOnly: true|false
# mountPropagation: None|HostToContainer|Bidirectional
## Additional InitContainers to initialize the pod ## Additional InitContainers to initialize the pod
## ##
extraInitContainers: [] extraInitContainers: []

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml # https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
--- ---
apiVersion: apiextensions.k8s.io/v1 apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition kind: CustomResourceDefinition

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml # https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml
--- ---
apiVersion: apiextensions.k8s.io/v1 apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition kind: CustomResourceDefinition
@ -981,15 +981,365 @@ spec:
x-kubernetes-map-type: atomic x-kubernetes-map-type: atomic
alertmanagerConfiguration: alertmanagerConfiguration:
description: 'EXPERIMENTAL: alertmanagerConfiguration specifies the description: 'EXPERIMENTAL: alertmanagerConfiguration specifies the
global Alertmanager configuration. If defined, it takes precedence configuration of Alertmanager. If defined, it takes precedence over
over the `configSecret` field. This field may change in future releases.' the `configSecret` field. This field may change in future releases.'
properties: properties:
global:
description: Defines the global parameters of the Alertmanager
configuration.
properties:
httpConfig:
description: HTTP client configuration.
properties:
authorization:
description: Authorization header configuration for the
client. This is mutually exclusive with BasicAuth and
is only available starting from Alertmanager v0.22+.
properties:
credentials:
description: The secret's key that contains the credentials
of the request
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its
key must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
type:
description: Set the authentication type. Defaults
to Bearer, Basic will cause an error
type: string
type: object
basicAuth:
description: BasicAuth for the client. This is mutually
exclusive with Authorization. If both are defined, BasicAuth
takes precedence.
properties:
password:
description: The secret in the service monitor namespace
that contains the password for authentication.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its
key must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
username:
description: The secret in the service monitor namespace
that contains the username for authentication.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its
key must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
type: object
bearerTokenSecret:
description: The secret's key that contains the bearer
token to be used by the client for authentication. The
secret needs to be in the same namespace as the Alertmanager
object and accessible by the Prometheus Operator.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its key
must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
followRedirects:
description: FollowRedirects specifies whether the client
should follow HTTP 3xx redirects.
type: boolean
oauth2:
description: OAuth2 client credentials used to fetch a
token for the targets.
properties:
clientId:
description: The secret or configmap containing the
OAuth2 client id
properties:
configMap:
description: ConfigMap containing data to use
for the targets.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the ConfigMap
or its key must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
secret:
description: Secret containing data to use for
the targets.
properties:
key:
description: The key of the secret to select
from. Must be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the Secret or
its key must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
type: object
clientSecret:
description: The secret containing the OAuth2 client
secret
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its
key must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
endpointParams:
additionalProperties:
type: string
description: Parameters to append to the token URL
type: object
scopes:
description: OAuth2 scopes used for the token request
items:
type: string
type: array
tokenUrl:
description: The URL to fetch the token from
minLength: 1
type: string
required:
- clientId
- clientSecret
- tokenUrl
type: object
proxyURL:
description: Optional proxy URL.
type: string
tlsConfig:
description: TLS configuration for the client.
properties:
ca:
description: Struct containing the CA cert to use
for the targets.
properties:
configMap:
description: ConfigMap containing data to use
for the targets.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the ConfigMap
or its key must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
secret:
description: Secret containing data to use for
the targets.
properties:
key:
description: The key of the secret to select
from. Must be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the Secret or
its key must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
type: object
cert:
description: Struct containing the client cert file
for the targets.
properties:
configMap:
description: ConfigMap containing data to use
for the targets.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the ConfigMap
or its key must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
secret:
description: Secret containing data to use for
the targets.
properties:
key:
description: The key of the secret to select
from. Must be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the Secret or
its key must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
type: object
insecureSkipVerify:
description: Disable target certificate validation.
type: boolean
keySecret:
description: Secret containing the client key file
for the targets.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string
optional:
description: Specify whether the Secret or its
key must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
serverName:
description: Used to verify the hostname for the targets.
type: string
type: object
type: object
resolveTimeout:
description: ResolveTimeout is the default value used by alertmanager
if the alert does not include EndsAt, after this time passes
it can declare the alert as resolved if it has not been
updated. This has no impact on alerts from Prometheus, as
they always include EndsAt.
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
type: string
type: object
name: name:
description: The name of the AlertmanagerConfig resource which description: The name of the AlertmanagerConfig resource which
is used to generate the global configuration. It must be defined is used to generate the Alertmanager configuration. It must
in the same namespace as the Alertmanager object. The operator be defined in the same namespace as the Alertmanager object.
will not enforce a `namespace` label for routes and inhibition The operator will not enforce a `namespace` label for routes
rules. and inhibition rules.
minLength: 1 minLength: 1
type: string type: string
type: object type: object
@ -1595,13 +1945,13 @@ spec:
Cannot be updated. Cannot be updated.
type: string type: string
ports: ports:
description: List of ports to expose from the container. Exposing description: List of ports to expose from the container. Not
a port here gives the system additional information about specifying a port here DOES NOT prevent that port from being
the network connections a container uses, but is primarily exposed. Any port which is listening on the default "0.0.0.0"
informational. Not specifying a port here DOES NOT prevent address inside a container will be accessible from the network.
that port from being exposed. Any port which is listening Modifying this array with strategic merge patch may corrupt
on the default "0.0.0.0" address inside a container will be the data. For more information See https://github.com/kubernetes/kubernetes/issues/108255.
accessible from the network. Cannot be updated. Cannot be updated.
items: items:
description: ContainerPort represents a network port in a description: ContainerPort represents a network port in a
single container. single container.
@ -2880,13 +3230,13 @@ spec:
Cannot be updated. Cannot be updated.
type: string type: string
ports: ports:
description: List of ports to expose from the container. Exposing description: List of ports to expose from the container. Not
a port here gives the system additional information about specifying a port here DOES NOT prevent that port from being
the network connections a container uses, but is primarily exposed. Any port which is listening on the default "0.0.0.0"
informational. Not specifying a port here DOES NOT prevent address inside a container will be accessible from the network.
that port from being exposed. Any port which is listening Modifying this array with strategic merge patch may corrupt
on the default "0.0.0.0" address inside a container will be the data. For more information See https://github.com/kubernetes/kubernetes/issues/108255.
accessible from the network. Cannot be updated. Cannot be updated.
items: items:
description: ContainerPort represents a network port in a description: ContainerPort represents a network port in a
single container. single container.
@ -4509,6 +4859,19 @@ spec:
type: object type: object
type: object type: object
x-kubernetes-map-type: atomic x-kubernetes-map-type: atomic
matchLabelKeys:
description: MatchLabelKeys is a set of pod label keys to select
the pods over which spreading will be calculated. The keys
are used to lookup values from the incoming pod labels, those
key-value labels are ANDed with labelSelector to select the
group of existing pods over which spreading will be calculated
for the incoming pod. Keys that don't exist in the incoming
pod labels will be ignored. A null or empty list means only
match against labelSelector.
items:
type: string
type: array
x-kubernetes-list-type: atomic
maxSkew: maxSkew:
description: 'MaxSkew describes the degree to which pods may description: 'MaxSkew describes the degree to which pods may
be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`, be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`,
@ -4549,11 +4912,31 @@ spec:
minimum\" is treated as 0. In this situation, new pod with minimum\" is treated as 0. In this situation, new pod with
the same labelSelector cannot be scheduled, because computed the same labelSelector cannot be scheduled, because computed
skew will be 3(3 - 0) if new Pod is scheduled to any of the skew will be 3(3 - 0) if new Pod is scheduled to any of the
three zones, it will violate MaxSkew. \n This is an alpha three zones, it will violate MaxSkew. \n This is a beta field
field and requires enabling MinDomainsInPodTopologySpread and requires the MinDomainsInPodTopologySpread feature gate
feature gate." to be enabled (enabled by default)."
format: int32 format: int32
type: integer type: integer
nodeAffinityPolicy:
description: "NodeAffinityPolicy indicates how we will treat
Pod's nodeAffinity/nodeSelector when calculating pod topology
spread skew. Options are: - Honor: only nodes matching nodeAffinity/nodeSelector
are included in the calculations. - Ignore: nodeAffinity/nodeSelector
are ignored. All nodes are included in the calculations. \n
If this value is nil, the behavior is equivalent to the Honor
policy. This is a alpha-level feature enabled by the NodeInclusionPolicyInPodTopologySpread
feature flag."
type: string
nodeTaintsPolicy:
description: "NodeTaintsPolicy indicates how we will treat node
taints when calculating pod topology spread skew. Options
are: - Honor: nodes without taints, along with tainted nodes
for which the incoming pod has a toleration, are included.
- Ignore: node taints are ignored. All nodes are included.
\n If this value is nil, the behavior is equivalent to the
Ignore policy. This is a alpha-level feature enabled by the
NodeInclusionPolicyInPodTopologySpread feature flag."
type: string
topologyKey: topologyKey:
description: TopologyKey is the key of node labels. Nodes that description: TopologyKey is the key of node labels. Nodes that
have a label with this key and identical values are considered have a label with this key and identical values are considered
@ -4561,10 +4944,11 @@ spec:
as a "bucket", and try to put balanced number of pods into as a "bucket", and try to put balanced number of pods into
each bucket. We define a domain as a particular instance of each bucket. We define a domain as a particular instance of
a topology. Also, we define an eligible domain as a domain a topology. Also, we define an eligible domain as a domain
whose nodes match the node selector. e.g. If TopologyKey is whose nodes meet the requirements of nodeAffinityPolicy and
"kubernetes.io/hostname", each Node is a domain of that topology. nodeTaintsPolicy. e.g. If TopologyKey is "kubernetes.io/hostname",
And, if TopologyKey is "topology.kubernetes.io/zone", each each Node is a domain of that topology. And, if TopologyKey
zone is a domain of that topology. It's a required field. is "topology.kubernetes.io/zone", each zone is a domain of
that topology. It's a required field.
type: string type: string
whenUnsatisfiable: whenUnsatisfiable:
description: 'WhenUnsatisfiable indicates how to deal with a description: 'WhenUnsatisfiable indicates how to deal with a
@ -6153,8 +6537,55 @@ spec:
web: web:
description: Defines the web command line flags when starting Alertmanager. description: Defines the web command line flags when starting Alertmanager.
properties: properties:
httpConfig:
description: Defines HTTP parameters for web server.
properties:
headers:
description: List of headers that can be added to HTTP responses.
properties:
contentSecurityPolicy:
description: Set the Content-Security-Policy header to
HTTP responses. Unset if blank.
type: string
strictTransportSecurity:
description: Set the Strict-Transport-Security header
to HTTP responses. Unset if blank. Please make sure
that you use this with care as this header might force
browsers to load Prometheus and the other applications
hosted on the same domain and subdomains over HTTPS.
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Strict-Transport-Security
type: string
xContentTypeOptions:
description: Set the X-Content-Type-Options header to
HTTP responses. Unset if blank. Accepted value is nosniff.
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Content-Type-Options
enum:
- ""
- NoSniff
type: string
xFrameOptions:
description: Set the X-Frame-Options header to HTTP responses.
Unset if blank. Accepted values are deny and sameorigin.
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Frame-Options
enum:
- ""
- Deny
- SameOrigin
type: string
xXSSProtection:
description: Set the X-XSS-Protection header to all responses.
Unset if blank. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-XSS-Protection
type: string
type: object
http2:
description: Enable HTTP/2 support. Note that HTTP/2 is only
supported with TLS. When TLSConfig is not configured, HTTP/2
will be disabled. Whenever the value of the field changes,
a rolling update will be triggered.
type: boolean
type: object
tlsConfig: tlsConfig:
description: WebTLSConfig defines the TLS parameters for HTTPS. description: Defines the TLS parameters for HTTPS.
properties: properties:
cert: cert:
description: Contains the TLS certificate for the server. description: Contains the TLS certificate for the server.

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml # https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
--- ---
apiVersion: apiextensions.k8s.io/v1 apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition kind: CustomResourceDefinition

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml # https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
--- ---
apiVersion: apiextensions.k8s.io/v1 apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition kind: CustomResourceDefinition

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml # https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
--- ---
apiVersion: apiextensions.k8s.io/v1 apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition kind: CustomResourceDefinition
@ -110,6 +110,31 @@ spec:
- key - key
type: object type: object
x-kubernetes-map-type: atomic x-kubernetes-map-type: atomic
additionalArgs:
description: AdditionalArgs allows setting additional arguments for
the Prometheus container. It is intended for e.g. activating hidden
flags which are not supported by the dedicated configuration options
yet. The arguments are passed as-is to the Prometheus container
which may cause issues if they are invalid or not supporeted by
the given Prometheus version. In case of an argument conflict (e.g.
an argument which is already set by the operator itself) or when
providing an invalid argument the reconciliation will fail and an
error will be logged.
items:
description: Argument as part of the AdditionalArgs list.
properties:
name:
description: Name of the argument, e.g. "scrape.discovery-reload-interval".
minLength: 1
type: string
value:
description: Argument value, e.g. 30s. Can be empty for name-only
arguments (e.g. --storage.tsdb.no-lockfile)
type: string
required:
- name
type: object
type: array
additionalScrapeConfigs: additionalScrapeConfigs:
description: 'AdditionalScrapeConfigs allows specifying a key of a description: 'AdditionalScrapeConfigs allows specifying a key of a
Secret containing additional Prometheus scrape configurations. Scrape Secret containing additional Prometheus scrape configurations. Scrape
@ -2004,13 +2029,13 @@ spec:
Cannot be updated. Cannot be updated.
type: string type: string
ports: ports:
description: List of ports to expose from the container. Exposing description: List of ports to expose from the container. Not
a port here gives the system additional information about specifying a port here DOES NOT prevent that port from being
the network connections a container uses, but is primarily exposed. Any port which is listening on the default "0.0.0.0"
informational. Not specifying a port here DOES NOT prevent address inside a container will be accessible from the network.
that port from being exposed. Any port which is listening Modifying this array with strategic merge patch may corrupt
on the default "0.0.0.0" address inside a container will be the data. For more information See https://github.com/kubernetes/kubernetes/issues/108255.
accessible from the network. Cannot be updated. Cannot be updated.
items: items:
description: ContainerPort represents a network port in a description: ContainerPort represents a network port in a
single container. single container.
@ -3440,13 +3465,13 @@ spec:
Cannot be updated. Cannot be updated.
type: string type: string
ports: ports:
description: List of ports to expose from the container. Exposing description: List of ports to expose from the container. Not
a port here gives the system additional information about specifying a port here DOES NOT prevent that port from being
the network connections a container uses, but is primarily exposed. Any port which is listening on the default "0.0.0.0"
informational. Not specifying a port here DOES NOT prevent address inside a container will be accessible from the network.
that port from being exposed. Any port which is listening Modifying this array with strategic merge patch may corrupt
on the default "0.0.0.0" address inside a container will be the data. For more information See https://github.com/kubernetes/kubernetes/issues/108255.
accessible from the network. Cannot be updated. Cannot be updated.
items: items:
description: ContainerPort represents a network port in a description: ContainerPort represents a network port in a
single container. single container.
@ -6347,6 +6372,29 @@ spec:
notice in any release. \n This is experimental and may change significantly notice in any release. \n This is experimental and may change significantly
without backward compatibility in any release." without backward compatibility in any release."
properties: properties:
additionalArgs:
description: AdditionalArgs allows setting additional arguments
for the Thanos container. The arguments are passed as-is to
the Thanos container which may cause issues if they are invalid
or not supporeted the given Thanos version. In case of an argument
conflict (e.g. an argument which is already set by the operator
itself) or when providing an invalid argument the reconciliation
will fail and an error will be logged.
items:
description: Argument as part of the AdditionalArgs list.
properties:
name:
description: Name of the argument, e.g. "scrape.discovery-reload-interval".
minLength: 1
type: string
value:
description: Argument value, e.g. 30s. Can be empty for
name-only arguments (e.g. --storage.tsdb.no-lockfile)
type: string
required:
- name
type: object
type: array
baseImage: baseImage:
description: 'Thanos base image if other than default. Deprecated: description: 'Thanos base image if other than default. Deprecated:
use ''image'' instead' use ''image'' instead'
@ -6756,6 +6804,19 @@ spec:
type: object type: object
type: object type: object
x-kubernetes-map-type: atomic x-kubernetes-map-type: atomic
matchLabelKeys:
description: MatchLabelKeys is a set of pod label keys to select
the pods over which spreading will be calculated. The keys
are used to lookup values from the incoming pod labels, those
key-value labels are ANDed with labelSelector to select the
group of existing pods over which spreading will be calculated
for the incoming pod. Keys that don't exist in the incoming
pod labels will be ignored. A null or empty list means only
match against labelSelector.
items:
type: string
type: array
x-kubernetes-list-type: atomic
maxSkew: maxSkew:
description: 'MaxSkew describes the degree to which pods may description: 'MaxSkew describes the degree to which pods may
be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`, be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`,
@ -6796,11 +6857,31 @@ spec:
minimum\" is treated as 0. In this situation, new pod with minimum\" is treated as 0. In this situation, new pod with
the same labelSelector cannot be scheduled, because computed the same labelSelector cannot be scheduled, because computed
skew will be 3(3 - 0) if new Pod is scheduled to any of the skew will be 3(3 - 0) if new Pod is scheduled to any of the
three zones, it will violate MaxSkew. \n This is an alpha three zones, it will violate MaxSkew. \n This is a beta field
field and requires enabling MinDomainsInPodTopologySpread and requires the MinDomainsInPodTopologySpread feature gate
feature gate." to be enabled (enabled by default)."
format: int32 format: int32
type: integer type: integer
nodeAffinityPolicy:
description: "NodeAffinityPolicy indicates how we will treat
Pod's nodeAffinity/nodeSelector when calculating pod topology
spread skew. Options are: - Honor: only nodes matching nodeAffinity/nodeSelector
are included in the calculations. - Ignore: nodeAffinity/nodeSelector
are ignored. All nodes are included in the calculations. \n
If this value is nil, the behavior is equivalent to the Honor
policy. This is a alpha-level feature enabled by the NodeInclusionPolicyInPodTopologySpread
feature flag."
type: string
nodeTaintsPolicy:
description: "NodeTaintsPolicy indicates how we will treat node
taints when calculating pod topology spread skew. Options
are: - Honor: nodes without taints, along with tainted nodes
for which the incoming pod has a toleration, are included.
- Ignore: node taints are ignored. All nodes are included.
\n If this value is nil, the behavior is equivalent to the
Ignore policy. This is a alpha-level feature enabled by the
NodeInclusionPolicyInPodTopologySpread feature flag."
type: string
topologyKey: topologyKey:
description: TopologyKey is the key of node labels. Nodes that description: TopologyKey is the key of node labels. Nodes that
have a label with this key and identical values are considered have a label with this key and identical values are considered
@ -6808,10 +6889,11 @@ spec:
as a "bucket", and try to put balanced number of pods into as a "bucket", and try to put balanced number of pods into
each bucket. We define a domain as a particular instance of each bucket. We define a domain as a particular instance of
a topology. Also, we define an eligible domain as a domain a topology. Also, we define an eligible domain as a domain
whose nodes match the node selector. e.g. If TopologyKey is whose nodes meet the requirements of nodeAffinityPolicy and
"kubernetes.io/hostname", each Node is a domain of that topology. nodeTaintsPolicy. e.g. If TopologyKey is "kubernetes.io/hostname",
And, if TopologyKey is "topology.kubernetes.io/zone", each each Node is a domain of that topology. And, if TopologyKey
zone is a domain of that topology. It's a required field. is "topology.kubernetes.io/zone", each zone is a domain of
that topology. It's a required field.
type: string type: string
whenUnsatisfiable: whenUnsatisfiable:
description: 'WhenUnsatisfiable indicates how to deal with a description: 'WhenUnsatisfiable indicates how to deal with a
@ -8404,11 +8486,58 @@ spec:
web: web:
description: Defines the web command line flags when starting Prometheus. description: Defines the web command line flags when starting Prometheus.
properties: properties:
httpConfig:
description: Defines HTTP parameters for web server.
properties:
headers:
description: List of headers that can be added to HTTP responses.
properties:
contentSecurityPolicy:
description: Set the Content-Security-Policy header to
HTTP responses. Unset if blank.
type: string
strictTransportSecurity:
description: Set the Strict-Transport-Security header
to HTTP responses. Unset if blank. Please make sure
that you use this with care as this header might force
browsers to load Prometheus and the other applications
hosted on the same domain and subdomains over HTTPS.
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Strict-Transport-Security
type: string
xContentTypeOptions:
description: Set the X-Content-Type-Options header to
HTTP responses. Unset if blank. Accepted value is nosniff.
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Content-Type-Options
enum:
- ""
- NoSniff
type: string
xFrameOptions:
description: Set the X-Frame-Options header to HTTP responses.
Unset if blank. Accepted values are deny and sameorigin.
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Frame-Options
enum:
- ""
- Deny
- SameOrigin
type: string
xXSSProtection:
description: Set the X-XSS-Protection header to all responses.
Unset if blank. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-XSS-Protection
type: string
type: object
http2:
description: Enable HTTP/2 support. Note that HTTP/2 is only
supported with TLS. When TLSConfig is not configured, HTTP/2
will be disabled. Whenever the value of the field changes,
a rolling update will be triggered.
type: boolean
type: object
pageTitle: pageTitle:
description: The prometheus web page title description: The prometheus web page title
type: string type: string
tlsConfig: tlsConfig:
description: WebTLSConfig defines the TLS parameters for HTTPS. description: Defines the TLS parameters for HTTPS.
properties: properties:
cert: cert:
description: Contains the TLS certificate for the server. description: Contains the TLS certificate for the server.

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml # https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
--- ---
apiVersion: apiextensions.k8s.io/v1 apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition kind: CustomResourceDefinition

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml # https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
--- ---
apiVersion: apiextensions.k8s.io/v1 apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition kind: CustomResourceDefinition

View File

@ -1,4 +1,4 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml # https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
--- ---
apiVersion: apiextensions.k8s.io/v1 apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition kind: CustomResourceDefinition
@ -1508,13 +1508,13 @@ spec:
Cannot be updated. Cannot be updated.
type: string type: string
ports: ports:
description: List of ports to expose from the container. Exposing description: List of ports to expose from the container. Not
a port here gives the system additional information about specifying a port here DOES NOT prevent that port from being
the network connections a container uses, but is primarily exposed. Any port which is listening on the default "0.0.0.0"
informational. Not specifying a port here DOES NOT prevent address inside a container will be accessible from the network.
that port from being exposed. Any port which is listening Modifying this array with strategic merge patch may corrupt
on the default "0.0.0.0" address inside a container will be the data. For more information See https://github.com/kubernetes/kubernetes/issues/108255.
accessible from the network. Cannot be updated. Cannot be updated.
items: items:
description: ContainerPort represents a network port in a description: ContainerPort represents a network port in a
single container. single container.
@ -2955,13 +2955,13 @@ spec:
Cannot be updated. Cannot be updated.
type: string type: string
ports: ports:
description: List of ports to expose from the container. Exposing description: List of ports to expose from the container. Not
a port here gives the system additional information about specifying a port here DOES NOT prevent that port from being
the network connections a container uses, but is primarily exposed. Any port which is listening on the default "0.0.0.0"
informational. Not specifying a port here DOES NOT prevent address inside a container will be accessible from the network.
that port from being exposed. Any port which is listening Modifying this array with strategic merge patch may corrupt
on the default "0.0.0.0" address inside a container will be the data. For more information See https://github.com/kubernetes/kubernetes/issues/108255.
accessible from the network. Cannot be updated. Cannot be updated.
items: items:
description: ContainerPort represents a network port in a description: ContainerPort represents a network port in a
single container. single container.
@ -4731,6 +4731,19 @@ spec:
type: object type: object
type: object type: object
x-kubernetes-map-type: atomic x-kubernetes-map-type: atomic
matchLabelKeys:
description: MatchLabelKeys is a set of pod label keys to select
the pods over which spreading will be calculated. The keys
are used to lookup values from the incoming pod labels, those
key-value labels are ANDed with labelSelector to select the
group of existing pods over which spreading will be calculated
for the incoming pod. Keys that don't exist in the incoming
pod labels will be ignored. A null or empty list means only
match against labelSelector.
items:
type: string
type: array
x-kubernetes-list-type: atomic
maxSkew: maxSkew:
description: 'MaxSkew describes the degree to which pods may description: 'MaxSkew describes the degree to which pods may
be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`, be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`,
@ -4771,11 +4784,31 @@ spec:
minimum\" is treated as 0. In this situation, new pod with minimum\" is treated as 0. In this situation, new pod with
the same labelSelector cannot be scheduled, because computed the same labelSelector cannot be scheduled, because computed
skew will be 3(3 - 0) if new Pod is scheduled to any of the skew will be 3(3 - 0) if new Pod is scheduled to any of the
three zones, it will violate MaxSkew. \n This is an alpha three zones, it will violate MaxSkew. \n This is a beta field
field and requires enabling MinDomainsInPodTopologySpread and requires the MinDomainsInPodTopologySpread feature gate
feature gate." to be enabled (enabled by default)."
format: int32 format: int32
type: integer type: integer
nodeAffinityPolicy:
description: "NodeAffinityPolicy indicates how we will treat
Pod's nodeAffinity/nodeSelector when calculating pod topology
spread skew. Options are: - Honor: only nodes matching nodeAffinity/nodeSelector
are included in the calculations. - Ignore: nodeAffinity/nodeSelector
are ignored. All nodes are included in the calculations. \n
If this value is nil, the behavior is equivalent to the Honor
policy. This is a alpha-level feature enabled by the NodeInclusionPolicyInPodTopologySpread
feature flag."
type: string
nodeTaintsPolicy:
description: "NodeTaintsPolicy indicates how we will treat node
taints when calculating pod topology spread skew. Options
are: - Honor: nodes without taints, along with tainted nodes
for which the incoming pod has a toleration, are included.
- Ignore: node taints are ignored. All nodes are included.
\n If this value is nil, the behavior is equivalent to the
Ignore policy. This is a alpha-level feature enabled by the
NodeInclusionPolicyInPodTopologySpread feature flag."
type: string
topologyKey: topologyKey:
description: TopologyKey is the key of node labels. Nodes that description: TopologyKey is the key of node labels. Nodes that
have a label with this key and identical values are considered have a label with this key and identical values are considered
@ -4783,10 +4816,11 @@ spec:
as a "bucket", and try to put balanced number of pods into as a "bucket", and try to put balanced number of pods into
each bucket. We define a domain as a particular instance of each bucket. We define a domain as a particular instance of
a topology. Also, we define an eligible domain as a domain a topology. Also, we define an eligible domain as a domain
whose nodes match the node selector. e.g. If TopologyKey is whose nodes meet the requirements of nodeAffinityPolicy and
"kubernetes.io/hostname", each Node is a domain of that topology. nodeTaintsPolicy. e.g. If TopologyKey is "kubernetes.io/hostname",
And, if TopologyKey is "topology.kubernetes.io/zone", each each Node is a domain of that topology. And, if TopologyKey
zone is a domain of that topology. It's a required field. is "topology.kubernetes.io/zone", each zone is a domain of
that topology. It's a required field.
type: string type: string
whenUnsatisfiable: whenUnsatisfiable:
description: 'WhenUnsatisfiable indicates how to deal with a description: 'WhenUnsatisfiable indicates how to deal with a
@ -4832,6 +4866,11 @@ spec:
- key - key
type: object type: object
x-kubernetes-map-type: atomic x-kubernetes-map-type: atomic
tracingConfigFile:
description: TracingConfig specifies the path of the tracing configuration
file. When used alongside with TracingConfig, TracingConfigFile
takes precedence.
type: string
volumes: volumes:
description: Volumes allows configuration of additional volumes on description: Volumes allows configuration of additional volumes on
the output StatefulSet definition. Volumes specified will be appended the output StatefulSet definition. Volumes specified will be appended

View File

@ -42,6 +42,10 @@ spec:
- --host={{ template "kube-prometheus-stack.operator.fullname" . }},{{ template "kube-prometheus-stack.operator.fullname" . }}.{{ template "kube-prometheus-stack.namespace" . }}.svc - --host={{ template "kube-prometheus-stack.operator.fullname" . }},{{ template "kube-prometheus-stack.operator.fullname" . }}.{{ template "kube-prometheus-stack.namespace" . }}.svc
- --namespace={{ template "kube-prometheus-stack.namespace" . }} - --namespace={{ template "kube-prometheus-stack.namespace" . }}
- --secret-name={{ template "kube-prometheus-stack.fullname" . }}-admission - --secret-name={{ template "kube-prometheus-stack.fullname" . }}-admission
{{- with .Values.prometheusOperator.admissionWebhooks.createSecretJob }}
securityContext:
{{ toYaml .securityContext | nindent 12 }}
{{- end }}
resources: resources:
{{ toYaml .Values.prometheusOperator.admissionWebhooks.patch.resources | indent 12 }} {{ toYaml .Values.prometheusOperator.admissionWebhooks.patch.resources | indent 12 }}
restartPolicy: OnFailure restartPolicy: OnFailure

View File

@ -43,6 +43,10 @@ spec:
- --namespace={{ template "kube-prometheus-stack.namespace" . }} - --namespace={{ template "kube-prometheus-stack.namespace" . }}
- --secret-name={{ template "kube-prometheus-stack.fullname" . }}-admission - --secret-name={{ template "kube-prometheus-stack.fullname" . }}-admission
- --patch-failure-policy={{ .Values.prometheusOperator.admissionWebhooks.failurePolicy }} - --patch-failure-policy={{ .Values.prometheusOperator.admissionWebhooks.failurePolicy }}
{{- with .Values.prometheusOperator.admissionWebhooks.patchWebhookJob }}
securityContext:
{{ toYaml .securityContext | nindent 12 }}
{{- end }}
resources: resources:
{{ toYaml .Values.prometheusOperator.admissionWebhooks.patch.resources | indent 12 }} {{ toYaml .Values.prometheusOperator.admissionWebhooks.patch.resources | indent 12 }}
restartPolicy: OnFailure restartPolicy: OnFailure

View File

@ -12,5 +12,5 @@ metadata:
app: {{ template "kube-prometheus-stack.name" . }}-prometheus-am-confg app: {{ template "kube-prometheus-stack.name" . }}-prometheus-am-confg
{{ include "kube-prometheus-stack.labels" . | indent 4 }} {{ include "kube-prometheus-stack.labels" . | indent 4 }}
data: data:
additional-alertmanager-configs.yaml: {{ toYaml .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs | b64enc | quote }} additional-alertmanager-configs.yaml: {{ tpl (toYaml .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs) . | b64enc | quote }}
{{- end }} {{- end }}

View File

@ -112,8 +112,10 @@ spec:
{{- if .Values.prometheus.prometheusSpec.retentionSize }} {{- if .Values.prometheus.prometheusSpec.retentionSize }}
retentionSize: {{ .Values.prometheus.prometheusSpec.retentionSize | quote }} retentionSize: {{ .Values.prometheus.prometheusSpec.retentionSize | quote }}
{{- end }} {{- end }}
{{- if .Values.prometheus.prometheusSpec.walCompression }} {{- if eq .Values.prometheus.prometheusSpec.walCompression false }}
walCompression: {{ .Values.prometheus.prometheusSpec.walCompression }} walCompression: false
{{ else }}
walCompression: true
{{- end }} {{- end }}
{{- if .Values.prometheus.prometheusSpec.routePrefix }} {{- if .Values.prometheus.prometheusSpec.routePrefix }}
routePrefix: {{ .Values.prometheus.prometheusSpec.routePrefix | quote }} routePrefix: {{ .Values.prometheus.prometheusSpec.routePrefix | quote }}
@ -354,7 +356,7 @@ spec:
name: "{{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) . | trunc 63 | trimSuffix "-" }}" name: "{{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) . | trunc 63 | trimSuffix "-" }}"
{{- end }} {{- end }}
{{- if .Values.prometheus.prometheusSpec.excludedFromEnforcement }} {{- if .Values.prometheus.prometheusSpec.excludedFromEnforcement }}
{{ toYaml .Values.prometheus.prometheusSpec.excludedFromEnforcement | indent 4 }} {{ tpl (toYaml .Values.prometheus.prometheusSpec.excludedFromEnforcement | indent 4) . }}
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- if .Values.prometheus.prometheusSpec.queryLogFile }} {{- if .Values.prometheus.prometheusSpec.queryLogFile }}

View File

@ -1530,6 +1530,7 @@ prometheus-node-exporter:
## Add the 'node-exporter' label to be used by serviceMonitor to match standard common usage in rules and grafana dashboards ## Add the 'node-exporter' label to be used by serviceMonitor to match standard common usage in rules and grafana dashboards
## ##
jobLabel: node-exporter jobLabel: node-exporter
releaseLabel: true
extraArgs: extraArgs:
- --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/) - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/)
- --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$ - --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
@ -1610,7 +1611,7 @@ prometheusOperator:
enabled: true enabled: true
image: image:
repository: k8s.gcr.io/ingress-nginx/kube-webhook-certgen repository: k8s.gcr.io/ingress-nginx/kube-webhook-certgen
tag: v1.2.0 tag: v1.3.0
sha: "" sha: ""
pullPolicy: IfNotPresent pullPolicy: IfNotPresent
resources: {} resources: {}
@ -1631,6 +1632,14 @@ prometheusOperator:
runAsNonRoot: true runAsNonRoot: true
runAsUser: 2000 runAsUser: 2000
# Security context for create job container
createSecretJob:
securityContext: {}
# Security context for patch job container
patchWebhookJob:
securityContext: {}
# Use certmanager to generate webhook certs # Use certmanager to generate webhook certs
certManager: certManager:
enabled: false enabled: false
@ -1841,7 +1850,7 @@ prometheusOperator:
## ##
image: image:
repository: quay.io/prometheus-operator/prometheus-operator repository: quay.io/prometheus-operator/prometheus-operator
tag: v0.58.0 tag: v0.59.1
sha: "" sha: ""
pullPolicy: IfNotPresent pullPolicy: IfNotPresent
@ -1856,10 +1865,9 @@ prometheusOperator:
## Prometheus-config-reloader ## Prometheus-config-reloader
## ##
prometheusConfigReloader: prometheusConfigReloader:
# image to use for config and rule reloading
image: image:
repository: quay.io/prometheus-operator/prometheus-config-reloader repository: quay.io/prometheus-operator/prometheus-config-reloader
tag: v0.58.0 tag: v0.59.1
sha: "" sha: ""
# resource config for prometheusConfigReloader # resource config for prometheusConfigReloader
@ -1875,7 +1883,7 @@ prometheusOperator:
## ##
thanosImage: thanosImage:
repository: quay.io/thanos/thanos repository: quay.io/thanos/thanos
tag: v0.27.0 tag: v0.28.0
sha: "" sha: ""
## Set a Field Selector to filter watched secrets ## Set a Field Selector to filter watched secrets
@ -2308,7 +2316,7 @@ prometheus:
## ##
image: image:
repository: quay.io/prometheus/prometheus repository: quay.io/prometheus/prometheus
tag: v2.37.0 tag: v2.38.0
sha: "" sha: ""
## Tolerations for use with node taints ## Tolerations for use with node taints
@ -2500,7 +2508,7 @@ prometheus:
## Enable compression of the write-ahead log using Snappy. ## Enable compression of the write-ahead log using Snappy.
## ##
walCompression: false walCompression: true
## If true, the Operator won't process any Prometheus configuration changes ## If true, the Operator won't process any Prometheus configuration changes
## ##
@ -3139,7 +3147,7 @@ thanosRuler:
## ##
image: image:
repository: quay.io/thanos/thanos repository: quay.io/thanos/thanos
tag: v0.27.0 tag: v0.28.0
sha: "" sha: ""
## Namespaces to be selected for PrometheusRules discovery. ## Namespaces to be selected for PrometheusRules discovery.

View File

@ -56,7 +56,11 @@
}, },
"gridPos": { }, "gridPos": { },
"id": 3, "id": 3,
"interval": null, "interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"links": [ ], "links": [ ],
"mappingType": 1, "mappingType": 1,
"mappingTypes": [ "mappingTypes": [
@ -128,13 +132,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 4, "id": 4,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -236,7 +241,11 @@
}, },
"gridPos": { }, "gridPos": { },
"id": 5, "id": 5,
"interval": null, "interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"links": [ ], "links": [ ],
"mappingType": 1, "mappingType": 1,
"mappingTypes": [ "mappingTypes": [
@ -307,13 +316,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 6, "id": 6,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -406,13 +416,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 7, "id": 7,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -488,13 +499,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 8, "id": 8,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -516,7 +528,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{verb=\"read\", cluster=\"$cluster\"}", "expr": "cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile{verb=\"read\", cluster=\"$cluster\"}",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{ resource }}", "legendFormat": "{{ resource }}",
@ -594,7 +606,11 @@
}, },
"gridPos": { }, "gridPos": { },
"id": 9, "id": 9,
"interval": null, "interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"links": [ ], "links": [ ],
"mappingType": 1, "mappingType": 1,
"mappingTypes": [ "mappingTypes": [
@ -665,13 +681,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 10, "id": 10,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -764,13 +781,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 11, "id": 11,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -846,13 +864,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 12, "id": 12,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -874,7 +893,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{verb=\"write\", cluster=\"$cluster\"}", "expr": "cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile{verb=\"write\", cluster=\"$cluster\"}",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{ resource }}", "legendFormat": "{{ resource }}",
@ -940,13 +959,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 13, "id": 13,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": false, "show": false,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -968,7 +988,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name)", "expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}} {{name}}", "legendFormat": "{{instance}} {{name}}",
@ -1021,13 +1041,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 14, "id": 14,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": false, "show": false,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -1049,7 +1070,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name)", "expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}} {{name}}", "legendFormat": "{{instance}} {{name}}",
@ -1102,6 +1123,7 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 15, "id": 15,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true, "alignAsTable": true,
"avg": false, "avg": false,
@ -1130,7 +1152,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name, le))", "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}} {{name}}", "legendFormat": "{{instance}} {{name}}",
@ -1196,13 +1218,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 16, "id": 16,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -1277,13 +1300,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 17, "id": 17,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -1305,7 +1329,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])", "expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}}", "legendFormat": "{{instance}}",
@ -1358,13 +1382,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 18, "id": 18,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -1452,7 +1477,7 @@
"value": "default" "value": "default"
}, },
"hide": 0, "hide": 0,
"label": null, "label": "Data Source",
"name": "datasource", "name": "datasource",
"options": [ ], "options": [ ],
"query": "prometheus", "query": "prometheus",
@ -1470,7 +1495,7 @@
"multi": false, "multi": false,
"name": "cluster", "name": "cluster",
"options": [ ], "options": [ ],
"query": "label_values(apiserver_request_total, cluster)", "query": "label_values(up{job=\"apiserver\"}, cluster)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,
@ -1490,7 +1515,7 @@
"multi": false, "multi": false,
"name": "instance", "name": "instance",
"options": [ ], "options": [ ],
"query": "label_values(apiserver_request_total{job=\"apiserver\", cluster=\"$cluster\"}, instance)", "query": "label_values(up{job=\"apiserver\", cluster=\"$cluster\"}, instance)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,

View File

@ -1619,7 +1619,7 @@
"value": "default" "value": "default"
}, },
"hide": 0, "hide": 0,
"label": null, "label": "Data Source",
"name": "datasource", "name": "datasource",
"options": [ ], "options": [ ],
"query": "prometheus", "query": "prometheus",

View File

@ -36,7 +36,11 @@
}, },
"gridPos": { }, "gridPos": { },
"id": 2, "id": 2,
"interval": null, "interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"links": [ ], "links": [ ],
"mappingType": 1, "mappingType": 1,
"mappingTypes": [ "mappingTypes": [
@ -106,6 +110,7 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 3, "id": 3,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true, "alignAsTable": true,
"avg": false, "avg": false,
@ -134,7 +139,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)", "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} {{name}}", "legendFormat": "{{cluster}} {{instance}} {{name}}",
@ -200,6 +205,7 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 4, "id": 4,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true, "alignAsTable": true,
"avg": false, "avg": false,
@ -228,7 +234,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)", "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} {{name}}", "legendFormat": "{{cluster}} {{instance}} {{name}}",
@ -294,6 +300,7 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 5, "id": 5,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true, "alignAsTable": true,
"avg": false, "avg": false,
@ -322,7 +329,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name, le))", "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} {{name}}", "legendFormat": "{{cluster}} {{instance}} {{name}}",
@ -388,13 +395,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 6, "id": 6,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -416,28 +424,28 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[5m]))", "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "2xx", "legendFormat": "2xx",
"refId": "A" "refId": "A"
}, },
{ {
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[5m]))", "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "3xx", "legendFormat": "3xx",
"refId": "B" "refId": "B"
}, },
{ {
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[5m]))", "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "4xx", "legendFormat": "4xx",
"refId": "C" "refId": "C"
}, },
{ {
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[5m]))", "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "5xx", "legendFormat": "5xx",
@ -490,13 +498,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 7, "id": 7,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -518,7 +527,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))", "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}", "legendFormat": "{{verb}} {{url}}",
@ -584,6 +593,7 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 8, "id": 8,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true, "alignAsTable": true,
"avg": false, "avg": false,
@ -612,7 +622,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}", "legendFormat": "{{verb}} {{url}}",
@ -678,13 +688,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 9, "id": 9,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -759,13 +770,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 10, "id": 10,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -787,7 +799,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}[5m])", "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}}", "legendFormat": "{{instance}}",
@ -840,13 +852,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 11, "id": 11,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -934,7 +947,7 @@
"value": "default" "value": "default"
}, },
"hide": 0, "hide": 0,
"label": null, "label": "Data Source",
"name": "datasource", "name": "datasource",
"options": [ ], "options": [ ],
"query": "prometheus", "query": "prometheus",

View File

@ -24,10 +24,12 @@
"id": 1, "id": 1,
"interval": "1m", "interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -47,7 +49,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[$__rate_interval]))", "expr": "cluster:node_cpu:ratio_rate5m{cluster=\"$cluster\"}",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -99,11 +101,14 @@
"fill": 1, "fill": 1,
"format": "percentunit", "format": "percentunit",
"id": 2, "id": 2,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -123,7 +128,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"cpu\",cluster=\"$cluster\"})", "expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -175,11 +180,14 @@
"fill": 1, "fill": 1,
"format": "percentunit", "format": "percentunit",
"id": 3, "id": 3,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -199,7 +207,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"cpu\",cluster=\"$cluster\"})", "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -251,11 +259,14 @@
"fill": 1, "fill": 1,
"format": "percentunit", "format": "percentunit",
"id": 4, "id": 4,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -275,7 +286,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{cluster=\"$cluster\"})", "expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{job=\"node-exporter\",cluster=\"$cluster\"})",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -327,11 +338,14 @@
"fill": 1, "fill": 1,
"format": "percentunit", "format": "percentunit",
"id": 5, "id": 5,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -351,7 +365,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})", "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -403,11 +417,14 @@
"fill": 1, "fill": 1,
"format": "percentunit", "format": "percentunit",
"id": 6, "id": 6,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -427,7 +444,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})", "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -490,11 +507,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 7, "id": 7,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -578,11 +598,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 8, "id": 8,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -741,7 +764,7 @@
], ],
"targets": [ "targets": [
{ {
"expr": "sum(kube_pod_owner{cluster=\"$cluster\"}) by (namespace)", "expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -861,11 +884,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 9, "id": 9,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -885,7 +911,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)", "expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{namespace}}", "legendFormat": "{{namespace}}",
@ -949,11 +975,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 10, "id": 10,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1112,7 +1141,7 @@
], ],
"targets": [ "targets": [
{ {
"expr": "sum(kube_pod_owner{cluster=\"$cluster\"}) by (namespace)", "expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1130,7 +1159,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)", "expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1148,7 +1177,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", "expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1166,7 +1195,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", "expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1234,10 +1263,12 @@
"id": 11, "id": 11,
"interval": "1m", "interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1381,7 +1412,7 @@
], ],
"targets": [ "targets": [
{ {
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "expr": "sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1390,7 +1421,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "expr": "sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1399,7 +1430,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "expr": "sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1408,7 +1439,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "expr": "sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1417,7 +1448,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1426,7 +1457,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1492,11 +1523,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 12, "id": 12,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1516,7 +1550,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "expr": "sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{namespace}}", "legendFormat": "{{namespace}}",
@ -1568,11 +1602,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 13, "id": 13,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1592,7 +1629,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "expr": "sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{namespace}}", "legendFormat": "{{namespace}}",
@ -1656,11 +1693,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 14, "id": 14,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1680,7 +1720,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "expr": "avg(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{namespace}}", "legendFormat": "{{namespace}}",
@ -1732,11 +1772,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 15, "id": 15,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1756,7 +1799,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "expr": "avg(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{namespace}}", "legendFormat": "{{namespace}}",
@ -1820,11 +1863,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 16, "id": 16,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1844,7 +1890,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "expr": "sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{namespace}}", "legendFormat": "{{namespace}}",
@ -1896,11 +1942,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 17, "id": 17,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1920,7 +1969,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "expr": "sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{namespace}}", "legendFormat": "{{namespace}}",
@ -1984,11 +2033,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 18, "id": 18,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -2008,7 +2060,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{namespace}}", "legendFormat": "{{namespace}}",
@ -2060,11 +2112,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 19, "id": 19,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -2084,7 +2139,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{namespace}}", "legendFormat": "{{namespace}}",
@ -2149,11 +2204,14 @@
"decimals": -1, "decimals": -1,
"fill": 10, "fill": 10,
"id": 20, "id": 20,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -2173,7 +2231,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m])))", "expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval])))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{namespace}}", "legendFormat": "{{namespace}}",
@ -2225,11 +2283,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 21, "id": 21,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -2249,7 +2310,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))", "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{namespace}}", "legendFormat": "{{namespace}}",
@ -2313,11 +2374,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 22, "id": 22,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -2465,7 +2529,7 @@
], ],
"targets": [ "targets": [
{ {
"expr": "sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]))", "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -2474,7 +2538,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum by(namespace) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m]))", "expr": "sum by(namespace) (rate(container_fs_writes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -2483,7 +2547,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m]))", "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -2492,7 +2556,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))", "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -2501,7 +2565,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))", "expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -2510,7 +2574,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))", "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,

View File

@ -22,11 +22,14 @@
"fill": 1, "fill": 1,
"format": "percentunit", "format": "percentunit",
"id": 1, "id": 1,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -46,7 +49,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -98,11 +101,14 @@
"fill": 1, "fill": 1,
"format": "percentunit", "format": "percentunit",
"id": 2, "id": 2,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -122,7 +128,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -174,11 +180,14 @@
"fill": 1, "fill": 1,
"format": "percentunit", "format": "percentunit",
"id": 3, "id": 3,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -198,7 +207,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})", "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -250,11 +259,14 @@
"fill": 1, "fill": 1,
"format": "percentunit", "format": "percentunit",
"id": 4, "id": 4,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -274,7 +286,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})", "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -337,11 +349,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 5, "id": 5,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -464,11 +479,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 6, "id": 6,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -699,11 +717,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 7, "id": 7,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -746,7 +767,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)", "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -826,11 +847,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 8, "id": 8,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1004,7 +1028,7 @@
], ],
"targets": [ "targets": [
{ {
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)", "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1022,7 +1046,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1040,7 +1064,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1049,7 +1073,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1058,7 +1082,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "expr": "sum(container_memory_cache{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1067,7 +1091,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "expr": "sum(container_memory_swap{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1135,10 +1159,12 @@
"id": 9, "id": 9,
"interval": "1m", "interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1282,7 +1308,7 @@
], ],
"targets": [ "targets": [
{ {
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "expr": "sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1291,7 +1317,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "expr": "sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1300,7 +1326,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "expr": "sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1309,7 +1335,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "expr": "sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1318,7 +1344,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1327,7 +1353,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1393,11 +1419,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 10, "id": 10,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1469,11 +1498,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 11, "id": 11,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1557,11 +1589,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 12, "id": 12,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1633,11 +1668,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 13, "id": 13,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1721,11 +1759,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 14, "id": 14,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1797,11 +1838,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 15, "id": 15,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1886,11 +1930,14 @@
"decimals": -1, "decimals": -1,
"fill": 10, "fill": 10,
"id": 16, "id": 16,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1910,7 +1957,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m])))", "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{container!=\"\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -1962,11 +2009,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 17, "id": 17,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1986,7 +2036,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))", "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{container!=\"\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -2050,11 +2100,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 18, "id": 18,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -2202,7 +2255,7 @@
], ],
"targets": [ "targets": [
{ {
"expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))", "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -2211,7 +2264,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum by(pod) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))", "expr": "sum by(pod) (rate(container_fs_writes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -2220,7 +2273,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))", "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -2229,7 +2282,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))", "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -2238,7 +2291,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum by(pod) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))", "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -2247,7 +2300,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))", "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -2336,7 +2389,7 @@
"multi": false, "multi": false,
"name": "cluster", "name": "cluster",
"options": [ ], "options": [ ],
"query": "label_values(kube_pod_info, cluster)", "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,
@ -2359,7 +2412,7 @@
"multi": false, "multi": false,
"name": "namespace", "name": "namespace",
"options": [ ], "options": [ ],
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,

View File

@ -21,11 +21,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 1, "id": 1,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -38,12 +41,32 @@
"pointradius": 5, "pointradius": 5,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [ ], "seriesOverrides": [
{
"alias": "max capacity",
"color": "#F2495C",
"dashes": true,
"fill": 0,
"hiddenSeries": true,
"hideTooltip": true,
"legend": true,
"linewidth": 2,
"stack": false
}
],
"spaceLength": 10, "spaceLength": 10,
"span": 12, "span": 12,
"stack": true, "stack": true,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{
"expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "max capacity",
"legendLink": null,
"step": 10
},
{ {
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "time_series", "format": "time_series",
@ -109,11 +132,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 2, "id": 2,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -344,11 +370,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 3, "id": 3,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -361,12 +390,32 @@
"pointradius": 5, "pointradius": 5,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [ ], "seriesOverrides": [
{
"alias": "max capacity",
"color": "#F2495C",
"dashes": true,
"fill": 0,
"hiddenSeries": true,
"hideTooltip": true,
"legend": true,
"linewidth": 2,
"stack": false
}
],
"spaceLength": 10, "spaceLength": 10,
"span": 12, "span": 12,
"stack": true, "stack": true,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{
"expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "max capacity",
"legendLink": null,
"step": 10
},
{ {
"expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)", "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)",
"format": "time_series", "format": "time_series",
@ -432,11 +481,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 4, "id": 4,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -762,7 +814,7 @@
"multi": false, "multi": false,
"name": "cluster", "name": "cluster",
"options": [ ], "options": [ ],
"query": "label_values(kube_pod_info, cluster)", "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,
@ -785,7 +837,7 @@
"multi": true, "multi": true,
"name": "node", "name": "node",
"options": [ ], "options": [ ],
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, node)", "query": "label_values(kube_node_info{cluster=\"$cluster\"}, node)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,

View File

@ -21,11 +21,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 1, "id": 1,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -72,7 +75,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "requests", "legendFormat": "requests",
@ -80,7 +83,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "limits", "legendFormat": "limits",
@ -144,11 +147,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 2, "id": 2,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": true, "current": true,
"max": true, "max": true,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -168,7 +174,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(increase(container_cpu_cfs_throttled_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[5m])) by (container) /sum(increase(container_cpu_cfs_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[5m])) by (container)", "expr": "sum(increase(container_cpu_cfs_throttled_periods_total{job=\"cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) /sum(increase(container_cpu_cfs_periods_total{job=\"cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{container}}", "legendFormat": "{{container}}",
@ -241,11 +247,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 3, "id": 3,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -476,11 +485,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 4, "id": 4,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -521,7 +533,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{container}}", "legendFormat": "{{container}}",
@ -529,7 +541,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "requests", "legendFormat": "requests",
@ -537,7 +549,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "limits", "legendFormat": "limits",
@ -601,11 +613,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 5, "id": 5,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -779,7 +794,7 @@
], ],
"targets": [ "targets": [
{ {
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -797,7 +812,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -815,7 +830,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -824,7 +839,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", "expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -833,7 +848,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", "expr": "sum(container_memory_cache{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -842,7 +857,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", "expr": "sum(container_memory_swap{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -910,10 +925,12 @@
"id": 6, "id": 6,
"interval": "1m", "interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -933,7 +950,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "expr": "sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -987,10 +1004,12 @@
"id": 7, "id": 7,
"interval": "1m", "interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1010,7 +1029,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "expr": "sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -1076,10 +1095,12 @@
"id": 8, "id": 8,
"interval": "1m", "interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1099,7 +1120,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "expr": "sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -1153,10 +1174,12 @@
"id": 9, "id": 9,
"interval": "1m", "interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1176,7 +1199,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "expr": "sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -1242,10 +1265,12 @@
"id": 10, "id": 10,
"interval": "1m", "interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1265,7 +1290,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -1319,10 +1344,12 @@
"id": 11, "id": 11,
"interval": "1m", "interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1342,7 +1369,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -1407,11 +1434,14 @@
"decimals": -1, "decimals": -1,
"fill": 10, "fill": 10,
"id": 12, "id": 12,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1431,7 +1461,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[5m])))", "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "Reads", "legendFormat": "Reads",
@ -1439,7 +1469,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "ceil(sum by(pod) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[5m])))", "expr": "ceil(sum by(pod) (rate(container_fs_writes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "Writes", "legendFormat": "Writes",
@ -1491,11 +1521,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 13, "id": 13,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1515,7 +1548,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[5m]))", "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "Reads", "legendFormat": "Reads",
@ -1523,7 +1556,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum by(pod) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[5m]))", "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "Writes", "legendFormat": "Writes",
@ -1588,11 +1621,14 @@
"decimals": -1, "decimals": -1,
"fill": 10, "fill": 10,
"id": 14, "id": 14,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1612,7 +1648,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "ceil(sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m])))", "expr": "ceil(sum by(container) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{container}}", "legendFormat": "{{container}}",
@ -1664,11 +1700,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 15, "id": 15,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1688,7 +1727,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))", "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{container}}", "legendFormat": "{{container}}",
@ -1752,11 +1791,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 16, "id": 16,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1904,7 +1946,7 @@
], ],
"targets": [ "targets": [
{ {
"expr": "sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))", "expr": "sum by(container) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1913,7 +1955,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum by(container) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))", "expr": "sum by(container) (rate(container_fs_writes_total{job=\"cadvisor\",device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1922,7 +1964,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))", "expr": "sum by(container) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1931,7 +1973,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))", "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1940,7 +1982,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum by(container) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))", "expr": "sum by(container) (rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1949,7 +1991,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))", "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -2038,7 +2080,7 @@
"multi": false, "multi": false,
"name": "cluster", "name": "cluster",
"options": [ ], "options": [ ],
"query": "label_values(kube_pod_info, cluster)", "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,
@ -2061,7 +2103,7 @@
"multi": false, "multi": false,
"name": "namespace", "name": "namespace",
"options": [ ], "options": [ ],
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,
@ -2084,7 +2126,7 @@
"multi": false, "multi": false,
"name": "pod", "name": "pod",
"options": [ ], "options": [ ],
"query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=\"$namespace\"}, pod)", "query": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, pod)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,

View File

@ -21,11 +21,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 1, "id": 1,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -109,11 +112,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 2, "id": 2,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -251,7 +257,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -260,7 +266,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -269,7 +275,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -278,7 +284,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -344,11 +350,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 3, "id": 3,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -432,11 +441,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 4, "id": 4,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -574,7 +586,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -583,7 +595,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -592,7 +604,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -601,7 +613,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -669,10 +681,12 @@
"id": 5, "id": 5,
"interval": "1m", "interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -816,7 +830,7 @@
], ],
"targets": [ "targets": [
{ {
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -825,7 +839,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -834,7 +848,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "(sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -843,7 +857,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "(sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -852,7 +866,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -861,7 +875,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -927,11 +941,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 6, "id": 6,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -951,7 +968,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -1003,11 +1020,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 7, "id": 7,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1027,7 +1047,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -1091,11 +1111,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 8, "id": 8,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1115,7 +1138,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "(avg(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -1167,11 +1190,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 9, "id": 9,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1191,7 +1217,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "(avg(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -1255,11 +1281,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 10, "id": 10,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1279,7 +1308,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "(sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -1331,11 +1360,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 11, "id": 11,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1355,7 +1387,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "(sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -1419,11 +1451,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 12, "id": 12,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1443,7 +1478,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -1495,11 +1530,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 13, "id": 13,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1519,7 +1557,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -1606,7 +1644,7 @@
"multi": false, "multi": false,
"name": "cluster", "name": "cluster",
"options": [ ], "options": [ ],
"query": "label_values(kube_pod_info, cluster)", "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,
@ -1629,30 +1667,7 @@
"multi": false, "multi": false,
"name": "namespace", "name": "namespace",
"options": [ ], "options": [ ],
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"text": "",
"value": ""
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "workload",
"options": [ ],
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,
@ -1675,7 +1690,30 @@
"multi": false, "multi": false,
"name": "type", "name": "type",
"options": [ ], "options": [ ],
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)", "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\"}, workload_type)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"text": "",
"value": ""
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "workload",
"options": [ ],
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}, workload)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,

View File

@ -21,11 +21,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 1, "id": 1,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -148,11 +151,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 2, "id": 2,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -329,7 +335,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -338,7 +344,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -347,7 +353,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -356,7 +362,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -422,11 +428,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 3, "id": 3,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -469,7 +478,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "expr": "sum(\n container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{workload}} - {{workload_type}}", "legendFormat": "{{workload}} - {{workload_type}}",
@ -549,11 +558,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"id": 4, "id": 4,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -721,7 +733,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "expr": "sum(\n container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -730,7 +742,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -739,7 +751,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "expr": "sum(\n container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -748,7 +760,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -757,7 +769,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "expr": "sum(\n container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -825,10 +837,12 @@
"id": 5, "id": 5,
"interval": "1m", "interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -987,7 +1001,7 @@
], ],
"targets": [ "targets": [
{ {
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", "expr": "(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -996,7 +1010,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1005,7 +1019,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", "expr": "(sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1014,7 +1028,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", "expr": "(sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1023,7 +1037,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1032,7 +1046,7 @@
"step": 10 "step": 10
}, },
{ {
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table", "format": "table",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@ -1098,11 +1112,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 6, "id": 6,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1122,7 +1139,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "expr": "(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{workload}}", "legendFormat": "{{workload}}",
@ -1174,11 +1191,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 7, "id": 7,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1198,7 +1218,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{workload}}", "legendFormat": "{{workload}}",
@ -1262,11 +1282,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 8, "id": 8,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1286,7 +1309,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "expr": "(avg(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{workload}}", "legendFormat": "{{workload}}",
@ -1338,11 +1361,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 9, "id": 9,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1362,7 +1388,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "expr": "(avg(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{workload}}", "legendFormat": "{{workload}}",
@ -1426,11 +1452,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 10, "id": 10,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1450,7 +1479,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "expr": "(sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{workload}}", "legendFormat": "{{workload}}",
@ -1502,11 +1531,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 11, "id": 11,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1526,7 +1558,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "expr": "(sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{workload}}", "legendFormat": "{{workload}}",
@ -1590,11 +1622,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 12, "id": 12,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1614,7 +1649,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{workload}}", "legendFormat": "{{workload}}",
@ -1666,11 +1701,14 @@
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"id": 13, "id": 13,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": true,
"show": true, "show": true,
"total": false, "total": false,
"values": false "values": false
@ -1690,7 +1728,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{workload}}", "legendFormat": "{{workload}}",
@ -1777,7 +1815,30 @@
"multi": false, "multi": false,
"name": "cluster", "name": "cluster",
"options": [ ], "options": [ ],
"query": "label_values(kube_pod_info, cluster)", "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"text": "",
"value": ""
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "namespace",
"options": [ ],
"query": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,
@ -1814,29 +1875,6 @@
"tagsQuery": "", "tagsQuery": "",
"type": "query", "type": "query",
"useTags": false "useTags": false
},
{
"allValue": null,
"current": {
"text": "",
"value": ""
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "namespace",
"options": [ ],
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
} }
] ]
}, },

View File

@ -154,7 +154,7 @@
"refId": "A" "refId": "A"
} }
], ],
"title": "Running Container", "title": "Running Containers",
"transparent": false, "transparent": false,
"type": "stat" "type": "stat"
}, },
@ -294,7 +294,7 @@
"pluginVersion": "7", "pluginVersion": "7",
"targets": [ "targets": [
{ {
"expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m]))", "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}}", "legendFormat": "{{instance}}",
@ -347,7 +347,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (operation_type, instance)", "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (operation_type, instance)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_type}}", "legendFormat": "{{instance}} {{operation_type}}",
@ -432,7 +432,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type)", "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_type}}", "legendFormat": "{{instance}} {{operation_type}}",
@ -517,7 +517,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_type}}", "legendFormat": "{{instance}} {{operation_type}}",
@ -602,14 +602,14 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)", "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}} pod", "legendFormat": "{{instance}} pod",
"refId": "A" "refId": "A"
}, },
{ {
"expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)", "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}} worker", "legendFormat": "{{instance}} worker",
@ -694,14 +694,14 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}} pod", "legendFormat": "{{instance}} pod",
"refId": "A" "refId": "A"
}, },
{ {
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}} worker", "legendFormat": "{{instance}} worker",
@ -788,7 +788,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
@ -875,7 +875,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
@ -962,7 +962,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))", "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
@ -1047,7 +1047,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type)", "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{operation_type}}", "legendFormat": "{{operation_type}}",
@ -1132,7 +1132,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_type}}", "legendFormat": "{{instance}} {{operation_type}}",
@ -1218,7 +1218,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance)", "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}}", "legendFormat": "{{instance}}",
@ -1303,7 +1303,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}}", "legendFormat": "{{instance}}",
@ -1388,7 +1388,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}}", "legendFormat": "{{instance}}",
@ -1473,28 +1473,28 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"2..\"}[5m]))", "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "2xx", "legendFormat": "2xx",
"refId": "A" "refId": "A"
}, },
{ {
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"3..\"}[5m]))", "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "3xx", "legendFormat": "3xx",
"refId": "B" "refId": "B"
}, },
{ {
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"4..\"}[5m]))", "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "4xx", "legendFormat": "4xx",
"refId": "C" "refId": "C"
}, },
{ {
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"5..\"}[5m]))", "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "5xx", "legendFormat": "5xx",
@ -1579,7 +1579,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))", "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, verb, url, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}} {{verb}} {{url}}", "legendFormat": "{{instance}} {{verb}} {{url}}",
@ -1749,7 +1749,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])", "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}}", "legendFormat": "{{instance}}",
@ -1893,7 +1893,7 @@
"value": "default" "value": "default"
}, },
"hide": 0, "hide": 0,
"label": null, "label": "Data Source",
"name": "datasource", "name": "datasource",
"options": [ ], "options": [ ],
"query": "prometheus", "query": "prometheus",
@ -1927,11 +1927,11 @@
"datasource": "$datasource", "datasource": "$datasource",
"hide": 0, "hide": 0,
"includeAll": true, "includeAll": true,
"label": null, "label": "instance",
"multi": false, "multi": false,
"name": "instance", "name": "instance",
"options": [ ], "options": [ ],
"query": "label_values(kubelet_runtime_operations_total{cluster=\"$cluster\", job=\"kubelet\"}, instance)", "query": "label_values(up{job=\"kubelet\",cluster=\"$cluster\"}, instance)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,

View File

@ -1137,7 +1137,7 @@
"value": "default" "value": "default"
}, },
"hide": 0, "hide": 0,
"label": null, "label": "Data Source",
"name": "datasource", "name": "datasource",
"options": [ ], "options": [ ],
"query": "prometheus", "query": "prometheus",

View File

@ -1349,7 +1349,7 @@
"value": "default" "value": "default"
}, },
"hide": 0, "hide": 0,
"label": null, "label": "Data Source",
"name": "datasource", "name": "datasource",
"options": [ ], "options": [ ],
"query": "prometheus", "query": "prometheus",

View File

@ -26,13 +26,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 2, "id": 2,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true, "alignAsTable": true,
"avg": true, "avg": true,
"current": true, "current": true,
"max": true, "max": true,
"min": true, "min": true,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -124,7 +125,11 @@
}, },
"gridPos": { }, "gridPos": { },
"id": 3, "id": 3,
"interval": null, "interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"links": [ ], "links": [ ],
"mappingType": 1, "mappingType": 1,
"mappingTypes": [ "mappingTypes": [
@ -207,13 +212,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 4, "id": 4,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true, "alignAsTable": true,
"avg": true, "avg": true,
"current": true, "current": true,
"max": true, "max": true,
"min": true, "min": true,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -305,7 +311,11 @@
}, },
"gridPos": { }, "gridPos": { },
"id": 5, "id": 5,
"interval": null, "interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"links": [ ], "links": [ ],
"mappingType": 1, "mappingType": 1,
"mappingTypes": [ "mappingTypes": [
@ -388,7 +398,7 @@
"value": "default" "value": "default"
}, },
"hide": 0, "hide": 0,
"label": null, "label": "Data Source",
"name": "datasource", "name": "datasource",
"options": [ ], "options": [ ],
"query": "prometheus", "query": "prometheus",
@ -406,7 +416,7 @@
"multi": false, "multi": false,
"name": "cluster", "name": "cluster",
"options": [ ], "options": [ ],
"query": "label_values(kubelet_volume_stats_capacity_bytes, cluster)", "query": "label_values(kubelet_volume_stats_capacity_bytes{job=\"kubelet\"}, cluster)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,

View File

@ -903,7 +903,7 @@
"value": "default" "value": "default"
}, },
"hide": 0, "hide": 0,
"label": null, "label": "Data Source",
"name": "datasource", "name": "datasource",
"options": [ ], "options": [ ],
"query": "prometheus", "query": "prometheus",

View File

@ -36,7 +36,11 @@
}, },
"gridPos": { }, "gridPos": { },
"id": 2, "id": 2,
"interval": null, "interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"links": [ ], "links": [ ],
"mappingType": 1, "mappingType": 1,
"mappingTypes": [ "mappingTypes": [
@ -106,13 +110,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 3, "id": 3,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -134,7 +139,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", "expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "rate", "legendFormat": "rate",
@ -187,6 +192,7 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 4, "id": 4,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true, "alignAsTable": true,
"avg": false, "avg": false,
@ -215,7 +221,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", "expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}}", "legendFormat": "{{instance}}",
@ -281,13 +287,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 5, "id": 5,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -309,7 +316,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", "expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "rate", "legendFormat": "rate",
@ -362,6 +369,7 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 6, "id": 6,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true, "alignAsTable": true,
"avg": false, "avg": false,
@ -390,7 +398,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m])) by (instance, le))", "expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}}", "legendFormat": "{{instance}}",
@ -456,13 +464,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 7, "id": 7,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -484,28 +493,28 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[5m]))", "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "2xx", "legendFormat": "2xx",
"refId": "A" "refId": "A"
}, },
{ {
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[5m]))", "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "3xx", "legendFormat": "3xx",
"refId": "B" "refId": "B"
}, },
{ {
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[5m]))", "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "4xx", "legendFormat": "4xx",
"refId": "C" "refId": "C"
}, },
{ {
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[5m]))", "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "5xx", "legendFormat": "5xx",
@ -558,13 +567,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 8, "id": 8,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -586,7 +596,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[5m])) by (verb, url, le))", "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}", "legendFormat": "{{verb}} {{url}}",
@ -652,6 +662,7 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 9, "id": 9,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true, "alignAsTable": true,
"avg": false, "avg": false,
@ -680,7 +691,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}", "legendFormat": "{{verb}} {{url}}",
@ -746,13 +757,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 10, "id": 10,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -827,13 +839,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 11, "id": 11,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -855,7 +868,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}[5m])", "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}}", "legendFormat": "{{instance}}",
@ -908,13 +921,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 12, "id": 12,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -1002,7 +1016,7 @@
"value": "default" "value": "default"
}, },
"hide": 0, "hide": 0,
"label": null, "label": "Data Source",
"name": "datasource", "name": "datasource",
"options": [ ], "options": [ ],
"query": "prometheus", "query": "prometheus",
@ -1020,7 +1034,7 @@
"multi": false, "multi": false,
"name": "cluster", "name": "cluster",
"options": [ ], "options": [ ],
"query": "label_values(kube_pod_info, cluster)", "query": "label_values(up{job=\"kube-proxy\"}, cluster)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,
@ -1040,7 +1054,7 @@
"multi": false, "multi": false,
"name": "instance", "name": "instance",
"options": [ ], "options": [ ],
"query": "label_values(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\"}, instance)", "query": "label_values(up{job=\"kube-proxy\", cluster=\"$cluster\", job=\"kube-proxy\"}, instance)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,

View File

@ -36,7 +36,11 @@
}, },
"gridPos": { }, "gridPos": { },
"id": 2, "id": 2,
"interval": null, "interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"links": [ ], "links": [ ],
"mappingType": 1, "mappingType": 1,
"mappingTypes": [ "mappingTypes": [
@ -106,6 +110,7 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 3, "id": 3,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true, "alignAsTable": true,
"avg": false, "avg": false,
@ -134,28 +139,28 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)", "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} e2e", "legendFormat": "{{cluster}} {{instance}} e2e",
"refId": "A" "refId": "A"
}, },
{ {
"expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)", "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} binding", "legendFormat": "{{cluster}} {{instance}} binding",
"refId": "B" "refId": "B"
}, },
{ {
"expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)", "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} scheduling algorithm", "legendFormat": "{{cluster}} {{instance}} scheduling algorithm",
"refId": "C" "refId": "C"
}, },
{ {
"expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)", "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} volume", "legendFormat": "{{cluster}} {{instance}} volume",
@ -208,6 +213,7 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 4, "id": 4,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true, "alignAsTable": true,
"avg": false, "avg": false,
@ -236,28 +242,28 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))", "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} e2e", "legendFormat": "{{cluster}} {{instance}} e2e",
"refId": "A" "refId": "A"
}, },
{ {
"expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))", "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} binding", "legendFormat": "{{cluster}} {{instance}} binding",
"refId": "B" "refId": "B"
}, },
{ {
"expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))", "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} scheduling algorithm", "legendFormat": "{{cluster}} {{instance}} scheduling algorithm",
"refId": "C" "refId": "C"
}, },
{ {
"expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))", "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} volume", "legendFormat": "{{cluster}} {{instance}} volume",
@ -323,13 +329,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 5, "id": 5,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -351,28 +358,28 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[5m]))", "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "2xx", "legendFormat": "2xx",
"refId": "A" "refId": "A"
}, },
{ {
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[5m]))", "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "3xx", "legendFormat": "3xx",
"refId": "B" "refId": "B"
}, },
{ {
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[5m]))", "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "4xx", "legendFormat": "4xx",
"refId": "C" "refId": "C"
}, },
{ {
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[5m]))", "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "5xx", "legendFormat": "5xx",
@ -425,13 +432,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 6, "id": 6,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -453,7 +461,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))", "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}", "legendFormat": "{{verb}} {{url}}",
@ -519,6 +527,7 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 7, "id": 7,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": true, "alignAsTable": true,
"avg": false, "avg": false,
@ -547,7 +556,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}", "legendFormat": "{{verb}} {{url}}",
@ -613,13 +622,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 8, "id": 8,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -694,13 +704,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 9, "id": 9,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -722,7 +733,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])", "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{instance}}", "legendFormat": "{{instance}}",
@ -775,13 +786,14 @@
"fillGradient": 0, "fillGradient": 0,
"gridPos": { }, "gridPos": { },
"id": 10, "id": 10,
"interval": "1m",
"legend": { "legend": {
"alignAsTable": false, "alignAsTable": true,
"avg": false, "avg": false,
"current": false, "current": false,
"max": false, "max": false,
"min": false, "min": false,
"rightSide": false, "rightSide": true,
"show": true, "show": true,
"sideWidth": null, "sideWidth": null,
"total": false, "total": false,
@ -869,7 +881,7 @@
"value": "default" "value": "default"
}, },
"hide": 0, "hide": 0,
"label": null, "label": "Data Source",
"name": "datasource", "name": "datasource",
"options": [ ], "options": [ ],
"query": "prometheus", "query": "prometheus",
@ -907,7 +919,7 @@
"multi": false, "multi": false,
"name": "instance", "name": "instance",
"options": [ ], "options": [ ],
"query": "label_values(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\"}, instance)", "query": "label_values(up{job=\"kube-scheduler\", cluster=\"$cluster\"}, instance)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,

View File

@ -89,7 +89,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "{{ pod }}", "legendFormat": "{{ pod }}",
@ -184,7 +184,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "{{ pod }}", "legendFormat": "{{ pod }}",
@ -290,7 +290,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "{{ pod }}", "legendFormat": "{{ pod }}",
@ -385,7 +385,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "{{ pod }}", "legendFormat": "{{ pod }}",
@ -506,7 +506,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -597,7 +597,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -699,7 +699,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "sort_desc(sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -790,7 +790,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -901,7 +901,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -992,7 +992,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "{{pod}}", "legendFormat": "{{pod}}",
@ -1061,7 +1061,7 @@
"value": "default" "value": "default"
}, },
"hide": 0, "hide": 0,
"label": null, "label": "Data Source",
"name": "datasource", "name": "datasource",
"options": [ ], "options": [ ],
"query": "prometheus", "query": "prometheus",
@ -1079,7 +1079,7 @@
"multi": false, "multi": false,
"name": "cluster", "name": "cluster",
"options": [ ], "options": [ ],
"query": "label_values(kube_pod_info, cluster)", "query": "label_values(kube_pod_info{job=\"kube-state-metrics\"}, cluster)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 0, "sort": 0,
@ -1099,14 +1099,14 @@
"value": "kube-system" "value": "kube-system"
}, },
"datasource": "$datasource", "datasource": "$datasource",
"definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", "definition": "label_values(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\"}, namespace)",
"hide": 0, "hide": 0,
"includeAll": true, "includeAll": true,
"label": null, "label": null,
"multi": false, "multi": false,
"name": "namespace", "name": "namespace",
"options": [ ], "options": [ ],
"query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", "query": "label_values(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\"}, namespace)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"skipUrlSync": false, "skipUrlSync": false,

View File

@ -8,7 +8,7 @@
"subdir": "jsonnet/kube-prometheus" "subdir": "jsonnet/kube-prometheus"
} }
}, },
"version": "release-0.9" "version": "release-0.11"
} }
], ],
"legacyImports": true "legacyImports": true

View File

@ -8,8 +8,8 @@
"subdir": "grafana" "subdir": "grafana"
} }
}, },
"version": "90f38916f1f8a310a715d18e36f787f84df4ddf5", "version": "d039275e4916aceae1c137120882e01d857787ac",
"sum": "0kZ1pnuIirDtbg6F9at5+NQOwKNONIGEPq0eECzvRkI=" "sum": "515vMn4x4tP8vegL4HLW0nDO5+njGTgnDZB5OOhtsCI="
}, },
{ {
"source": { "source": {
@ -18,9 +18,19 @@
"subdir": "contrib/mixin" "subdir": "contrib/mixin"
} }
}, },
"version": "74aa38ec10bc22d34ffd204f46df6e460b78d855", "version": "19002cfc689fba2b8f56605e5797bf79f8b61fdd",
"sum": "W/Azptf1PoqjyMwJON96UY69MFugDA4IAYiKURscryc=" "sum": "W/Azptf1PoqjyMwJON96UY69MFugDA4IAYiKURscryc="
}, },
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafana.git",
"subdir": "grafana-mixin"
}
},
"version": "3eed09056849ab873b867b561b7ce580ef2c75ba",
"sum": "MkjR7zCgq6MUZgjDzop574tFKoTX2OBr7DTwm1K+Ofs="
},
{ {
"source": { "source": {
"git": { "git": {
@ -38,7 +48,7 @@
"subdir": "grafana-builder" "subdir": "grafana-builder"
} }
}, },
"version": "c132c4afcf17491718539db4c2d94c0ea4346120", "version": "dbf6fc14105c28b6fd0253005f7ca2da37d3d4e1",
"sum": "tDR6yT2GVfw0wTU12iZH+m01HrbIr6g/xN+/8nzNkU0=" "sum": "tDR6yT2GVfw0wTU12iZH+m01HrbIr6g/xN+/8nzNkU0="
}, },
{ {
@ -48,8 +58,8 @@
"subdir": "" "subdir": ""
} }
}, },
"version": "fb9d8ed4bc4a3d6efac525f72e8a0d2c583a0fe2", "version": "b8f44bb7be728423836bef0e904ec7166895a34b",
"sum": "xjKkdp+5fkekCNBUIgZCHTRmVdUEmQNFKslrL2Ho8gs=" "sum": "LCgSosxceeYuoau5fYSPtE5eXOFe46DxexfkrctUv7c="
}, },
{ {
"source": { "source": {
@ -58,7 +68,7 @@
"subdir": "lib/promgrafonnet" "subdir": "lib/promgrafonnet"
} }
}, },
"version": "eb98d4f74e8ac9c30b1f0e815b07bed31da76c8f", "version": "5e44626d70c2bf2d35c37f3fee5a6261a5335cc6",
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps=" "sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
}, },
{ {
@ -68,8 +78,8 @@
"subdir": "jsonnet/kube-state-metrics" "subdir": "jsonnet/kube-state-metrics"
} }
}, },
"version": "f170cc73f11c1580d7f38af746be0f2fa79c6a1e", "version": "0567e1e1b981755e563d2244fa1659563f2cddbc",
"sum": "S5qI+PJUdNeYOv76jH5nxwYS9N6U7CRxvyuB1wI4cTE=" "sum": "P0dCnbzyPScQGNXwXRcwiPkMLeTq0IPNbSTysDbySnM="
}, },
{ {
"source": { "source": {
@ -78,7 +88,7 @@
"subdir": "jsonnet/kube-state-metrics-mixin" "subdir": "jsonnet/kube-state-metrics-mixin"
} }
}, },
"version": "f170cc73f11c1580d7f38af746be0f2fa79c6a1e", "version": "0567e1e1b981755e563d2244fa1659563f2cddbc",
"sum": "u8gaydJoxEjzizQ8jY8xSjYgWooPmxw+wIWdDxifMAk=" "sum": "u8gaydJoxEjzizQ8jY8xSjYgWooPmxw+wIWdDxifMAk="
}, },
{ {
@ -88,8 +98,8 @@
"subdir": "jsonnet/kube-prometheus" "subdir": "jsonnet/kube-prometheus"
} }
}, },
"version": "452aaed72e36acb31cae93cfa85a5d9c3d3d2ec7", "version": "e3066575dc8be21f578f12887563bda3ee7a2eff",
"sum": "pupXEvlRbhLdEO9b8LfFZB66+Z7fEqvRZ9m3MyEvsv4=" "sum": "nNEMDrb5sQDOxJ20ITDvldyfIbbiGcVr8Bq46PH2ww8="
}, },
{ {
"source": { "source": {
@ -98,8 +108,8 @@
"subdir": "jsonnet/mixin" "subdir": "jsonnet/mixin"
} }
}, },
"version": "83fe36566f4e0894eb5ffcd2638a0f039a17bdeb", "version": "5db6996d3ca995e66301c53c33959fd64c3f6ae6",
"sum": "6reUygVmQrLEWQzTKcH8ceDbvM+2ztK3z2VBR2K2l+U=", "sum": "GQmaVFJwKMiD/P4n3N2LrAZVcwutriWrP8joclDtBYQ=",
"name": "prometheus-operator-mixin" "name": "prometheus-operator-mixin"
}, },
{ {
@ -109,8 +119,8 @@
"subdir": "jsonnet/prometheus-operator" "subdir": "jsonnet/prometheus-operator"
} }
}, },
"version": "83fe36566f4e0894eb5ffcd2638a0f039a17bdeb", "version": "5db6996d3ca995e66301c53c33959fd64c3f6ae6",
"sum": "J1G++A8hrtr3+OZQMmcNeb1w/C30bXqqwpwHL/Xhsd4=" "sum": "pUggCYwO/3Y/p6Vgryx8Y4KO3QkJ+GqimrZtn/luzzI="
}, },
{ {
"source": { "source": {
@ -119,8 +129,8 @@
"subdir": "doc/alertmanager-mixin" "subdir": "doc/alertmanager-mixin"
} }
}, },
"version": "b408b522bc653d014e53035e59fa394cc1edd762", "version": "14b01e6a34dd3155768c7e9bd5c4376055de9419",
"sum": "pep+dHzfIjh2SU5pEkwilMCAT/NoL6YYflV4x8cr7vU=", "sum": "f3iZDUXQ/YWB5yDCY7VLD5bs442+3CdJgXJhJyWhNf8=",
"name": "alertmanager" "name": "alertmanager"
}, },
{ {
@ -130,8 +140,8 @@
"subdir": "docs/node-mixin" "subdir": "docs/node-mixin"
} }
}, },
"version": "832909dd257eb368cf83363ffcae3ab84cb4bcb1", "version": "a2321e7b940ddcff26873612bccdf7cd4c42b6b6",
"sum": "MmxGhE2PJ1a52mk2x7vDpMT2at4Jglbud/rK74CB5i0=" "sum": "MlWDAKGZ+JArozRKdKEvewHeWn8j2DNBzesJfLVd0dk="
}, },
{ {
"source": { "source": {
@ -140,10 +150,20 @@
"subdir": "documentation/prometheus-mixin" "subdir": "documentation/prometheus-mixin"
} }
}, },
"version": "751ca03faddc9c64089c41d0da370a3a0b477742", "version": "d7e7b8e04b5ecdc1dd153534ba376a622b72741b",
"sum": "AS8WYFi/z10BZSF6DFkKBscjB32XDMM7iIso7CO/FyI=", "sum": "APXOIP3B3dZ3Tyh7L2UhyWR8Vbf5+9adTLz/ya7n6uU=",
"name": "prometheus" "name": "prometheus"
}, },
{
"source": {
"git": {
"remote": "https://github.com/pyrra-dev/pyrra.git",
"subdir": "config/crd/bases"
}
},
"version": "3738a607a42a0c9566587a49cec7587cc92d61bd",
"sum": "GQ0GFKGdIWKx1b78VRs6jtC4SMqkBjT5jl65QUjPKK4="
},
{ {
"source": { "source": {
"git": { "git": {
@ -151,8 +171,8 @@
"subdir": "mixin" "subdir": "mixin"
} }
}, },
"version": "ff363498fc95cfe17de894d7237bcf38bdd0bc36", "version": "17c576472d80972bfd3705e1e0a08e6f8da8e04b",
"sum": "cajthvLKDjYgYHCKQU2g/pTMRkxcbuJEvTnCyJOihl8=", "sum": "dBm9ML50quhu6dwTIgfNmVruMqfaUeQVCO/6EKtQLxE=",
"name": "thanos-mixin" "name": "thanos-mixin"
} }
], ],

View File

@ -6,10 +6,10 @@ dashboards:
url: https://grafana.com/api/dashboards/12539/revisions/5/download url: https://grafana.com/api/dashboards/12539/revisions/5/download
tags: ['kubernetes', 'DNS'] tags: ['kubernetes', 'DNS']
- name: etcd - name: etcd
url: https://grafana.com/api/dashboards/3070/revisions/3/download url: https://grafana.com/api/dashboards/15308/revisions/1/download
tags: ['kubernetes', 'etcd'] tags: ['kubernetes', 'etcd']
- name: node - name: node
url: https://grafana.com/api/dashboards/1860/revisions/23/download url: https://grafana.com/api/dashboards/1860/revisions/27/download
tags: ['kubernetes'] tags: ['kubernetes']
# cd dashboards; for f in *.json; do echo "- name: ${f%%.json}" >> ../dashboards.yaml; echo " url: file://dashboards/$f" >> ../dashboards.yaml; done; cd - # cd dashboards; for f in *.json; do echo "- name: ${f%%.json}" >> ../dashboards.yaml; echo " url: file://dashboards/$f" >> ../dashboards.yaml; done; cd -
- name: apiserver - name: apiserver

View File

@ -4,9 +4,10 @@
"metadata": { "metadata": {
"labels": { "labels": {
"app.kubernetes.io/component": "alert-router", "app.kubernetes.io/component": "alert-router",
"app.kubernetes.io/instance": "main",
"app.kubernetes.io/name": "alertmanager", "app.kubernetes.io/name": "alertmanager",
"app.kubernetes.io/part-of": "kube-prometheus", "app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "0.22.2", "app.kubernetes.io/version": "0.24.0",
"prometheus": "k8s", "prometheus": "k8s",
"role": "alert-rules" "role": "alert-rules"
}, },

View File

@ -41,6 +41,18 @@
"labels": { "labels": {
"severity": "none" "severity": "none"
} }
},
{
"alert": "InfoInhibitor",
"annotations": {
"description": "This is an alert that is used to inhibit info alerts.\nBy themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with\nother alerts.\nThis alert fires whenever there's a severity=\"info\" alert, and stops firing when another alert with a\nseverity of 'warning' or 'critical' starts firing on the same namespace.\nThis alert should be routed to a null receiver and configured to inhibit alerts with severity=\"info\".\n",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor",
"summary": "Info-level alert inhibition."
},
"expr": "ALERTS{severity = \"info\"} == 1 unless on(namespace) ALERTS{alertname != \"InfoInhibitor\", severity =~ \"warning|critical\", alertstate=\"firing\"} == 1",
"labels": {
"severity": "none"
}
} }
] ]
}, },
@ -86,7 +98,7 @@
"record": "cluster:node_cpu:sum_rate5m" "record": "cluster:node_cpu:sum_rate5m"
}, },
{ {
"expr": "cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))", "expr": "cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))",
"record": "cluster:node_cpu:ratio" "record": "cluster:node_cpu:ratio"
} }
] ]

View File

@ -6,7 +6,7 @@
"app.kubernetes.io/component": "exporter", "app.kubernetes.io/component": "exporter",
"app.kubernetes.io/name": "kube-state-metrics", "app.kubernetes.io/name": "kube-state-metrics",
"app.kubernetes.io/part-of": "kube-prometheus", "app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "2.1.1", "app.kubernetes.io/version": "2.5.0",
"prometheus": "k8s", "prometheus": "k8s",
"role": "alert-rules" "role": "alert-rules"
}, },

View File

@ -36,7 +36,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready",
"summary": "Pod has been in a non-ready state for more than 15 minutes." "summary": "Pod has been in a non-ready state for more than 15 minutes."
}, },
"expr": "sum by (namespace, pod) (\n max by(namespace, pod) (\n kube_pod_status_phase{job=\"kube-state-metrics\", phase=~\"Pending|Unknown\"}\n ) * on(namespace, pod) group_left(owner_kind) topk by(namespace, pod) (\n 1, max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!=\"Job\"})\n )\n) > 0\n", "expr": "sum by (namespace, pod, cluster) (\n max by(namespace, pod, cluster) (\n kube_pod_status_phase{job=\"kube-state-metrics\", phase=~\"Pending|Unknown\"}\n ) * on(namespace, pod, cluster) group_left(owner_kind) topk by(namespace, pod, cluster) (\n 1, max by(namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!=\"Job\"})\n )\n) > 0\n",
"for": "15m", "for": "15m",
"labels": { "labels": {
"severity": "warning" "severity": "warning"
@ -114,7 +114,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck",
"summary": "DaemonSet rollout is stuck." "summary": "DaemonSet rollout is stuck."
}, },
"expr": "(\n (\n kube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_misscheduled{job=\"kube-state-metrics\"}\n !=\n 0\n ) or (\n kube_daemonset_updated_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_available{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n )\n) and (\n changes(kube_daemonset_updated_number_scheduled{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n", "expr": "(\n (\n kube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_misscheduled{job=\"kube-state-metrics\"}\n !=\n 0\n ) or (\n kube_daemonset_status_updated_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_available{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n )\n) and (\n changes(kube_daemonset_status_updated_number_scheduled{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n",
"for": "15m", "for": "15m",
"labels": { "labels": {
"severity": "warning" "severity": "warning"
@ -123,11 +123,11 @@
{ {
"alert": "KubeContainerWaiting", "alert": "KubeContainerWaiting",
"annotations": { "annotations": {
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} has been in waiting state for longer than 1 hour.", "description": "pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container {{ $labels.container}} has been in waiting state for longer than 1 hour.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting",
"summary": "Pod container waiting longer than 1 hour" "summary": "Pod container waiting longer than 1 hour"
}, },
"expr": "sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job=\"kube-state-metrics\"}) > 0\n", "expr": "sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job=\"kube-state-metrics\"}) > 0\n",
"for": "1h", "for": "1h",
"labels": { "labels": {
"severity": "warning" "severity": "warning"
@ -160,14 +160,13 @@
} }
}, },
{ {
"alert": "KubeJobCompletion", "alert": "KubeJobNotCompleted",
"annotations": { "annotations": {
"description": "Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than 12 hours to complete.", "description": "Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than {{ \"43200\" | humanizeDuration }} to complete.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobcompletion", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted",
"summary": "Job did not complete in time" "summary": "Job did not complete in time"
}, },
"expr": "kube_job_spec_completions{job=\"kube-state-metrics\"} - kube_job_status_succeeded{job=\"kube-state-metrics\"} > 0\n", "expr": "time() - max by(namespace, job_name, cluster) (kube_job_status_start_time{job=\"kube-state-metrics\"}\n and\nkube_job_status_active{job=\"kube-state-metrics\"} > 0) > 43200\n",
"for": "12h",
"labels": { "labels": {
"severity": "warning" "severity": "warning"
} }
@ -232,7 +231,7 @@
{ {
"alert": "KubeMemoryOvercommit", "alert": "KubeMemoryOvercommit",
"annotations": { "annotations": {
"description": "Cluster has overcommitted memory resource requests for Pods by {{ $value }} bytes and cannot tolerate node failure.", "description": "Cluster has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit",
"summary": "Cluster has overcommitted memory resource requests." "summary": "Cluster has overcommitted memory resource requests."
}, },
@ -249,7 +248,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit",
"summary": "Cluster has overcommitted CPU resource requests." "summary": "Cluster has overcommitted CPU resource requests."
}, },
"expr": "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"cpu\"})\n /\nsum(kube_node_status_allocatable{resource=\"cpu\"})\n > 1.5\n", "expr": "sum(min without(resource) (kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=~\"(cpu|requests.cpu)\"}))\n /\nsum(kube_node_status_allocatable{resource=\"cpu\", job=\"kube-state-metrics\"})\n > 1.5\n",
"for": "5m", "for": "5m",
"labels": { "labels": {
"severity": "warning" "severity": "warning"
@ -262,7 +261,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit",
"summary": "Cluster has overcommitted memory resource requests." "summary": "Cluster has overcommitted memory resource requests."
}, },
"expr": "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"memory\"})\n /\nsum(kube_node_status_allocatable{resource=\"memory\",job=\"kube-state-metrics\"})\n > 1.5\n", "expr": "sum(min without(resource) (kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=~\"(memory|requests.memory)\"}))\n /\nsum(kube_node_status_allocatable{resource=\"memory\", job=\"kube-state-metrics\"})\n > 1.5\n",
"for": "5m", "for": "5m",
"labels": { "labels": {
"severity": "warning" "severity": "warning"
@ -332,7 +331,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup",
"summary": "PersistentVolume is filling up." "summary": "PersistentVolume is filling up."
}, },
"expr": "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\n", "expr": "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n",
"for": "1m", "for": "1m",
"labels": { "labels": {
"severity": "critical" "severity": "critical"
@ -345,7 +344,33 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup",
"summary": "PersistentVolume is filling up." "summary": "PersistentVolume is filling up."
}, },
"expr": "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\n", "expr": "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n",
"for": "1h",
"labels": {
"severity": "warning"
}
},
{
"alert": "KubePersistentVolumeInodesFillingUp",
"annotations": {
"description": "The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} only has {{ $value | humanizePercentage }} free inodes.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup",
"summary": "PersistentVolumeInodes are filling up."
},
"expr": "(\n kubelet_volume_stats_inodes_free{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_inodes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_inodes_used{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n",
"for": "1m",
"labels": {
"severity": "critical"
}
},
{
"alert": "KubePersistentVolumeInodesFillingUp",
"annotations": {
"description": "Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to run out of inodes within four days. Currently {{ $value | humanizePercentage }} of its inodes are free.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup",
"summary": "PersistentVolumeInodes are filling up."
},
"expr": "(\n kubelet_volume_stats_inodes_free{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_inodes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_inodes_used{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_inodes_free{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n",
"for": "1h", "for": "1h",
"labels": { "labels": {
"severity": "warning" "severity": "warning"
@ -376,7 +401,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch",
"summary": "Different semantic versions of Kubernetes components running." "summary": "Different semantic versions of Kubernetes components running."
}, },
"expr": "count(count by (git_version) (label_replace(kubernetes_build_info{job!~\"kube-dns|coredns\"},\"git_version\",\"$1\",\"git_version\",\"(v[0-9]*.[0-9]*).*\"))) > 1\n", "expr": "count by (cluster) (count by (git_version, cluster) (label_replace(kubernetes_build_info{job!~\"kube-dns|coredns\"},\"git_version\",\"$1\",\"git_version\",\"(v[0-9]*.[0-9]*).*\"))) > 1\n",
"for": "15m", "for": "15m",
"labels": { "labels": {
"severity": "warning" "severity": "warning"
@ -389,7 +414,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors",
"summary": "Kubernetes API server client is experiencing errors." "summary": "Kubernetes API server client is experiencing errors."
}, },
"expr": "(sum(rate(rest_client_requests_total{code=~\"5..\"}[5m])) by (instance, job, namespace)\n /\nsum(rate(rest_client_requests_total[5m])) by (instance, job, namespace))\n> 0.01\n", "expr": "(sum(rate(rest_client_requests_total{code=~\"5..\"}[5m])) by (cluster, instance, job, namespace)\n /\nsum(rate(rest_client_requests_total[5m])) by (cluster, instance, job, namespace))\n> 0.01\n",
"for": "15m", "for": "15m",
"labels": { "labels": {
"severity": "warning" "severity": "warning"
@ -468,7 +493,7 @@
{ {
"alert": "KubeClientCertificateExpiration", "alert": "KubeClientCertificateExpiration",
"annotations": { "annotations": {
"description": "A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.", "description": "A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration",
"summary": "Client certificate is about to expire." "summary": "Client certificate is about to expire."
}, },
@ -480,7 +505,7 @@
{ {
"alert": "KubeClientCertificateExpiration", "alert": "KubeClientCertificateExpiration",
"annotations": { "annotations": {
"description": "A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.", "description": "A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration",
"summary": "Client certificate is about to expire." "summary": "Client certificate is about to expire."
}, },
@ -490,25 +515,25 @@
} }
}, },
{ {
"alert": "AggregatedAPIErrors", "alert": "KubeAggregatedAPIErrors",
"annotations": { "annotations": {
"description": "An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m.", "description": "Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/aggregatedapierrors", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors",
"summary": "An aggregated API has reported errors." "summary": "Kubernetes aggregated API has reported errors."
}, },
"expr": "sum by(name, namespace)(increase(aggregator_unavailable_apiservice_total[10m])) > 4\n", "expr": "sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total[10m])) > 4\n",
"labels": { "labels": {
"severity": "warning" "severity": "warning"
} }
}, },
{ {
"alert": "AggregatedAPIDown", "alert": "KubeAggregatedAPIDown",
"annotations": { "annotations": {
"description": "An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m.", "description": "Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/aggregatedapidown", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown",
"summary": "An aggregated API is down." "summary": "Kubernetes aggregated API is down."
}, },
"expr": "(1 - max by(name, namespace)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85\n", "expr": "(1 - max by(name, namespace, cluster)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85\n",
"for": "5m", "for": "5m",
"labels": { "labels": {
"severity": "warning" "severity": "warning"
@ -530,9 +555,9 @@
{ {
"alert": "KubeAPITerminatedRequests", "alert": "KubeAPITerminatedRequests",
"annotations": { "annotations": {
"description": "The apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.", "description": "The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests",
"summary": "The apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests." "summary": "The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests."
}, },
"expr": "sum(rate(apiserver_request_terminations_total{job=\"apiserver\"}[10m])) / ( sum(rate(apiserver_request_total{job=\"apiserver\"}[10m])) + sum(rate(apiserver_request_terminations_total{job=\"apiserver\"}[10m])) ) > 0.20\n", "expr": "sum(rate(apiserver_request_terminations_total{job=\"apiserver\"}[10m])) / ( sum(rate(apiserver_request_total{job=\"apiserver\"}[10m])) + sum(rate(apiserver_request_terminations_total{job=\"apiserver\"}[10m])) ) > 0.20\n",
"for": "5m", "for": "5m",
@ -578,10 +603,10 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods",
"summary": "Kubelet is running at capacity." "summary": "Kubelet is running at capacity."
}, },
"expr": "count by(node) (\n (kube_pod_status_phase{job=\"kube-state-metrics\",phase=\"Running\"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job=\"kube-state-metrics\"})\n)\n/\nmax by(node) (\n kube_node_status_capacity{job=\"kube-state-metrics\",resource=\"pods\"} != 1\n) > 0.95\n", "expr": "count by(cluster, node) (\n (kube_pod_status_phase{job=\"kube-state-metrics\",phase=\"Running\"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job=\"kube-state-metrics\"})\n)\n/\nmax by(cluster, node) (\n kube_node_status_capacity{job=\"kube-state-metrics\",resource=\"pods\"} != 1\n) > 0.95\n",
"for": "15m", "for": "15m",
"labels": { "labels": {
"severity": "warning" "severity": "info"
} }
}, },
{ {
@ -591,7 +616,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping",
"summary": "Node readiness status is flapping." "summary": "Node readiness status is flapping."
}, },
"expr": "sum(changes(kube_node_status_condition{status=\"true\",condition=\"Ready\"}[15m])) by (node) > 2\n", "expr": "sum(changes(kube_node_status_condition{status=\"true\",condition=\"Ready\"}[15m])) by (cluster, node) > 2\n",
"for": "15m", "for": "15m",
"labels": { "labels": {
"severity": "warning" "severity": "warning"
@ -617,7 +642,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh",
"summary": "Kubelet Pod startup latency is too high." "summary": "Kubelet Pod startup latency is too high."
}, },
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job=\"kubelet\", metrics_path=\"/metrics\"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"} > 60\n", "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job=\"kubelet\", metrics_path=\"/metrics\"}[5m])) by (cluster, instance, le)) * on(cluster, instance) group_left(node) kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"} > 60\n",
"for": "15m", "for": "15m",
"labels": { "labels": {
"severity": "warning" "severity": "warning"
@ -752,98 +777,98 @@
"name": "kube-apiserver-burnrate.rules", "name": "kube-apiserver-burnrate.rules",
"rules": [ "rules": [
{ {
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n", "expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n",
"labels": { "labels": {
"verb": "read" "verb": "read"
}, },
"record": "apiserver_request:burnrate1d" "record": "apiserver_request:burnrate1d"
}, },
{ {
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n", "expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n",
"labels": { "labels": {
"verb": "read" "verb": "read"
}, },
"record": "apiserver_request:burnrate1h" "record": "apiserver_request:burnrate1h"
}, },
{ {
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n", "expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n",
"labels": { "labels": {
"verb": "read" "verb": "read"
}, },
"record": "apiserver_request:burnrate2h" "record": "apiserver_request:burnrate2h"
}, },
{ {
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n", "expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n",
"labels": { "labels": {
"verb": "read" "verb": "read"
}, },
"record": "apiserver_request:burnrate30m" "record": "apiserver_request:burnrate30m"
}, },
{ {
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n", "expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n",
"labels": { "labels": {
"verb": "read" "verb": "read"
}, },
"record": "apiserver_request:burnrate3d" "record": "apiserver_request:burnrate3d"
}, },
{ {
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n", "expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n",
"labels": { "labels": {
"verb": "read" "verb": "read"
}, },
"record": "apiserver_request:burnrate5m" "record": "apiserver_request:burnrate5m"
}, },
{ {
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n", "expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n",
"labels": { "labels": {
"verb": "read" "verb": "read"
}, },
"record": "apiserver_request:burnrate6h" "record": "apiserver_request:burnrate6h"
}, },
{ {
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n", "expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n",
"labels": { "labels": {
"verb": "write" "verb": "write"
}, },
"record": "apiserver_request:burnrate1d" "record": "apiserver_request:burnrate1d"
}, },
{ {
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n", "expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n",
"labels": { "labels": {
"verb": "write" "verb": "write"
}, },
"record": "apiserver_request:burnrate1h" "record": "apiserver_request:burnrate1h"
}, },
{ {
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n", "expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n",
"labels": { "labels": {
"verb": "write" "verb": "write"
}, },
"record": "apiserver_request:burnrate2h" "record": "apiserver_request:burnrate2h"
}, },
{ {
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n", "expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n",
"labels": { "labels": {
"verb": "write" "verb": "write"
}, },
"record": "apiserver_request:burnrate30m" "record": "apiserver_request:burnrate30m"
}, },
{ {
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n", "expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n",
"labels": { "labels": {
"verb": "write" "verb": "write"
}, },
"record": "apiserver_request:burnrate3d" "record": "apiserver_request:burnrate3d"
}, },
{ {
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n", "expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n",
"labels": { "labels": {
"verb": "write" "verb": "write"
}, },
"record": "apiserver_request:burnrate5m" "record": "apiserver_request:burnrate5m"
}, },
{ {
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n", "expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n",
"labels": { "labels": {
"verb": "write" "verb": "write"
}, },
@ -855,41 +880,20 @@
"name": "kube-apiserver-histogram.rules", "name": "kube-apiserver-histogram.rules",
"rules": [ "rules": [
{ {
"expr": "histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))) > 0\n", "expr": "histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))) > 0\n",
"labels": { "labels": {
"quantile": "0.99", "quantile": "0.99",
"verb": "read" "verb": "read"
}, },
"record": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile" "record": "cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile"
}, },
{ {
"expr": "histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))) > 0\n", "expr": "histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))) > 0\n",
"labels": { "labels": {
"quantile": "0.99", "quantile": "0.99",
"verb": "write" "verb": "write"
}, },
"record": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile" "record": "cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile"
},
{
"expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance, pod))\n",
"labels": {
"quantile": "0.99"
},
"record": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile"
},
{
"expr": "histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance, pod))\n",
"labels": {
"quantile": "0.9"
},
"record": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile"
},
{
"expr": "histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance, pod))\n",
"labels": {
"quantile": "0.5"
},
"record": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile"
} }
] ]
}, },
@ -916,21 +920,37 @@
"record": "code:apiserver_request_total:increase30d" "record": "code:apiserver_request_total:increase30d"
}, },
{ {
"expr": "1 - (\n (\n # write too slow\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~\"POST|PUT|PATCH|DELETE\"}[30d]))\n -\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30d]))\n ) +\n (\n # read too slow\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~\"LIST|GET\"}[30d]))\n -\n (\n (\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30d]))\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30d]))\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n", "expr": "sum by (cluster, verb, scope) (increase(apiserver_request_slo_duration_seconds_count[1h]))\n",
"record": "cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase1h"
},
{
"expr": "sum by (cluster, verb, scope) (avg_over_time(cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase1h[30d]) * 24 * 30)\n",
"record": "cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d"
},
{
"expr": "sum by (cluster, verb, scope, le) (increase(apiserver_request_slo_duration_seconds_bucket[1h]))\n",
"record": "cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase1h"
},
{
"expr": "sum by (cluster, verb, scope, le) (avg_over_time(cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase1h[30d]) * 24 * 30)\n",
"record": "cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d"
},
{
"expr": "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n",
"labels": { "labels": {
"verb": "all" "verb": "all"
}, },
"record": "apiserver_request:availability30d" "record": "apiserver_request:availability30d"
}, },
{ {
"expr": "1 - (\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30d]))\n -\n (\n # too slow\n (\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30d]))\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30d]))\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n", "expr": "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n",
"labels": { "labels": {
"verb": "read" "verb": "read"
}, },
"record": "apiserver_request:availability30d" "record": "apiserver_request:availability30d"
}, },
{ {
"expr": "1 - (\n (\n # too slow\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~\"POST|PUT|PATCH|DELETE\"}[30d]))\n -\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30d]))\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n", "expr": "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n",
"labels": { "labels": {
"verb": "write" "verb": "write"
}, },
@ -992,7 +1012,7 @@
"record": "node_namespace_pod_container:container_memory_swap" "record": "node_namespace_pod_container:container_memory_swap"
}, },
{ {
"expr": "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n", "expr": "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
"record": "cluster:namespace:pod_memory:active:kube_pod_container_resource_requests" "record": "cluster:namespace:pod_memory:active:kube_pod_container_resource_requests"
}, },
{ {
@ -1000,7 +1020,7 @@
"record": "namespace_memory:kube_pod_container_resource_requests:sum" "record": "namespace_memory:kube_pod_container_resource_requests:sum"
}, },
{ {
"expr": "kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n", "expr": "kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
"record": "cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests" "record": "cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests"
}, },
{ {
@ -1008,7 +1028,7 @@
"record": "namespace_cpu:kube_pod_container_resource_requests:sum" "record": "namespace_cpu:kube_pod_container_resource_requests:sum"
}, },
{ {
"expr": "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n", "expr": "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
"record": "cluster:namespace:pod_memory:active:kube_pod_container_resource_limits" "record": "cluster:namespace:pod_memory:active:kube_pod_container_resource_limits"
}, },
{ {
@ -1016,7 +1036,7 @@
"record": "namespace_memory:kube_pod_container_resource_limits:sum" "record": "namespace_memory:kube_pod_container_resource_limits:sum"
}, },
{ {
"expr": "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n )\n", "expr": "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n )\n",
"record": "cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits" "record": "cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits"
}, },
{ {
@ -1043,6 +1063,13 @@
"workload_type": "statefulset" "workload_type": "statefulset"
}, },
"record": "namespace_workload_pod:kube_pod_owner:relabel" "record": "namespace_workload_pod:kube_pod_owner:relabel"
},
{
"expr": "max by (cluster, namespace, workload, pod) (\n label_replace(\n kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"Job\"},\n \"workload\", \"$1\", \"owner_name\", \"(.*)\"\n )\n)\n",
"labels": {
"workload_type": "job"
},
"record": "namespace_workload_pod:kube_pod_owner:relabel"
} }
] ]
}, },
@ -1118,7 +1145,7 @@
"name": "node.rules", "name": "node.rules",
"rules": [ "rules": [
{ {
"expr": "topk by(namespace, pod) (1,\n max by (node, namespace, pod) (\n label_replace(kube_pod_info{job=\"kube-state-metrics\",node!=\"\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")\n))\n", "expr": "topk by(cluster, namespace, pod) (1,\n max by (cluster, node, namespace, pod) (\n label_replace(kube_pod_info{job=\"kube-state-metrics\",node!=\"\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")\n))\n",
"record": "node_namespace_pod:kube_pod_info:" "record": "node_namespace_pod:kube_pod_info:"
}, },
{ {
@ -1128,6 +1155,10 @@
{ {
"expr": "sum(\n node_memory_MemAvailable_bytes{job=\"node-exporter\"} or\n (\n node_memory_Buffers_bytes{job=\"node-exporter\"} +\n node_memory_Cached_bytes{job=\"node-exporter\"} +\n node_memory_MemFree_bytes{job=\"node-exporter\"} +\n node_memory_Slab_bytes{job=\"node-exporter\"}\n )\n) by (cluster)\n", "expr": "sum(\n node_memory_MemAvailable_bytes{job=\"node-exporter\"} or\n (\n node_memory_Buffers_bytes{job=\"node-exporter\"} +\n node_memory_Cached_bytes{job=\"node-exporter\"} +\n node_memory_MemFree_bytes{job=\"node-exporter\"} +\n node_memory_Slab_bytes{job=\"node-exporter\"}\n )\n) by (cluster)\n",
"record": ":node_memory_MemAvailable_bytes:sum" "record": ":node_memory_MemAvailable_bytes:sum"
},
{
"expr": "sum(rate(node_cpu_seconds_total{job=\"node-exporter\",mode!=\"idle\",mode!=\"iowait\",mode!=\"steal\"}[5m])) /\ncount(sum(node_cpu_seconds_total{job=\"node-exporter\"}) by (cluster, instance, cpu))\n",
"record": "cluster:node_cpu:ratio_rate5m"
} }
] ]
}, },
@ -1135,21 +1166,21 @@
"name": "kubelet.rules", "name": "kubelet.rules",
"rules": [ "rules": [
{ {
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"})\n", "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (cluster, instance, le) * on(cluster, instance) group_left(node) kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"})\n",
"labels": { "labels": {
"quantile": "0.99" "quantile": "0.99"
}, },
"record": "node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile" "record": "node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile"
}, },
{ {
"expr": "histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"})\n", "expr": "histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (cluster, instance, le) * on(cluster, instance) group_left(node) kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"})\n",
"labels": { "labels": {
"quantile": "0.9" "quantile": "0.9"
}, },
"record": "node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile" "record": "node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile"
}, },
{ {
"expr": "histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"})\n", "expr": "histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (cluster, instance, le) * on(cluster, instance) group_left(node) kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"})\n",
"labels": { "labels": {
"quantile": "0.5" "quantile": "0.5"
}, },

View File

@ -6,7 +6,7 @@
"app.kubernetes.io/component": "exporter", "app.kubernetes.io/component": "exporter",
"app.kubernetes.io/name": "node-exporter", "app.kubernetes.io/name": "node-exporter",
"app.kubernetes.io/part-of": "kube-prometheus", "app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "1.2.2", "app.kubernetes.io/version": "1.3.1",
"prometheus": "k8s", "prometheus": "k8s",
"role": "alert-rules" "role": "alert-rules"
}, },
@ -25,7 +25,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup",
"summary": "Filesystem is predicted to run out of space within the next 24 hours." "summary": "Filesystem is predicted to run out of space within the next 24 hours."
}, },
"expr": "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 40\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 24*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n", "expr": "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 15\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 24*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n",
"for": "1h", "for": "1h",
"labels": { "labels": {
"severity": "warning" "severity": "warning"
@ -38,7 +38,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup",
"summary": "Filesystem is predicted to run out of space within the next 4 hours." "summary": "Filesystem is predicted to run out of space within the next 4 hours."
}, },
"expr": "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 15\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 4*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n", "expr": "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 10\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 4*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n",
"for": "1h", "for": "1h",
"labels": { "labels": {
"severity": "critical" "severity": "critical"
@ -255,11 +255,11 @@
"name": "node-exporter.rules", "name": "node-exporter.rules",
"rules": [ "rules": [
{ {
"expr": "count without (cpu) (\n count without (mode) (\n node_cpu_seconds_total{job=\"node-exporter\"}\n )\n)\n", "expr": "count without (cpu, mode) (\n node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}\n)\n",
"record": "instance:node_num_cpu:sum" "record": "instance:node_num_cpu:sum"
}, },
{ {
"expr": "1 - avg without (cpu, mode) (\n rate(node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\"}[5m])\n)\n", "expr": "1 - avg without (cpu) (\n sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\"}[5m]))\n)\n",
"record": "instance:node_cpu_utilisation:rate5m" "record": "instance:node_cpu_utilisation:rate5m"
}, },
{ {
@ -267,7 +267,7 @@
"record": "instance:node_load1_per_cpu:ratio" "record": "instance:node_load1_per_cpu:ratio"
}, },
{ {
"expr": "1 - (\n node_memory_MemAvailable_bytes{job=\"node-exporter\"}\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\"}\n)\n", "expr": "1 - (\n (\n node_memory_MemAvailable_bytes{job=\"node-exporter\"}\n or\n (\n node_memory_Buffers_bytes{job=\"node-exporter\"}\n +\n node_memory_Cached_bytes{job=\"node-exporter\"}\n +\n node_memory_MemFree_bytes{job=\"node-exporter\"}\n +\n node_memory_Slab_bytes{job=\"node-exporter\"}\n )\n )\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\"}\n)\n",
"record": "instance:node_memory_utilisation:ratio" "record": "instance:node_memory_utilisation:ratio"
}, },
{ {

View File

@ -6,7 +6,7 @@
"app.kubernetes.io/component": "controller", "app.kubernetes.io/component": "controller",
"app.kubernetes.io/name": "prometheus-operator", "app.kubernetes.io/name": "prometheus-operator",
"app.kubernetes.io/part-of": "kube-prometheus", "app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "0.49.0", "app.kubernetes.io/version": "0.57.0",
"prometheus": "k8s", "prometheus": "k8s",
"role": "alert-rules" "role": "alert-rules"
}, },
@ -38,7 +38,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorwatcherrors", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorwatcherrors",
"summary": "Errors while performing watch operations in controller." "summary": "Errors while performing watch operations in controller."
}, },
"expr": "(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job=\"prometheus-operator\",namespace=\"monitoring\"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job=\"prometheus-operator\",namespace=\"monitoring\"}[10m]))) > 0.4\n", "expr": "(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job=\"prometheus-operator\",namespace=\"monitoring\"}[5m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job=\"prometheus-operator\",namespace=\"monitoring\"}[5m]))) > 0.4\n",
"for": "15m", "for": "15m",
"labels": { "labels": {
"severity": "warning" "severity": "warning"
@ -90,7 +90,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatornotready", "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatornotready",
"summary": "Prometheus operator not ready" "summary": "Prometheus operator not ready"
}, },
"expr": "min by(namespace, controller) (max_over_time(prometheus_operator_ready{job=\"prometheus-operator\",namespace=\"monitoring\"}[5m]) == 0)\n", "expr": "min by (controller,namespace) (max_over_time(prometheus_operator_ready{job=\"prometheus-operator\",namespace=\"monitoring\"}[5m]) == 0)\n",
"for": "5m", "for": "5m",
"labels": { "labels": {
"severity": "warning" "severity": "warning"
@ -110,6 +110,24 @@
} }
} }
] ]
},
{
"name": "config-reloaders",
"rules": [
{
"alert": "ConfigReloaderSidecarErrors",
"annotations": {
"description": "Errors encountered while the {{$labels.pod}} config-reloader sidecar attempts to sync config in {{$labels.namespace}} namespace.\nAs a result, configuration for service running in {{$labels.pod}} may be stale and cannot be updated anymore.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/configreloadersidecarerrors",
"summary": "config-reloader sidecar has not had a successful reload for 10m"
},
"expr": "max_over_time(reloader_last_reload_successful{namespace=~\".+\"}[5m]) == 0\n",
"for": "10m",
"labels": {
"severity": "warning"
}
}
]
} }
] ]
} }

View File

@ -4,9 +4,10 @@
"metadata": { "metadata": {
"labels": { "labels": {
"app.kubernetes.io/component": "prometheus", "app.kubernetes.io/component": "prometheus",
"app.kubernetes.io/instance": "k8s",
"app.kubernetes.io/name": "prometheus", "app.kubernetes.io/name": "prometheus",
"app.kubernetes.io/part-of": "kube-prometheus", "app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "2.29.1", "app.kubernetes.io/version": "2.36.1",
"prometheus": "k8s", "prometheus": "k8s",
"role": "alert-rules" "role": "alert-rules"
}, },
@ -226,6 +227,32 @@
"severity": "warning" "severity": "warning"
} }
}, },
{
"alert": "PrometheusScrapeBodySizeLimitHit",
"annotations": {
"description": "Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf \"%.0f\" $value }} scrapes in the last 5m because some targets exceeded the configured body_size_limit.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapebodysizelimithit",
"summary": "Prometheus has dropped some targets that exceeded body size limit."
},
"expr": "increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m]) > 0\n",
"for": "15m",
"labels": {
"severity": "warning"
}
},
{
"alert": "PrometheusScrapeSampleLimitHit",
"annotations": {
"description": "Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf \"%.0f\" $value }} scrapes in the last 5m because some targets exceeded the configured sample_limit.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapesamplelimithit",
"summary": "Prometheus has failed scrapes that have exceeded the configured sample limit."
},
"expr": "increase(prometheus_target_scrapes_exceeded_sample_limit_total{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m]) > 0\n",
"for": "15m",
"labels": {
"severity": "warning"
}
},
{ {
"alert": "PrometheusTargetSyncFailure", "alert": "PrometheusTargetSyncFailure",
"annotations": { "annotations": {

File diff suppressed because one or more lines are too long

View File

@ -36,6 +36,26 @@ spec:
expr: vector(1) expr: vector(1)
labels: labels:
severity: none severity: none
- alert: InfoInhibitor
annotations:
description: 'This is an alert that is used to inhibit info alerts.
By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with
other alerts.
This alert fires whenever there''s a severity="info" alert, and stops firing when another alert with a
severity of ''warning'' or ''critical'' starts firing on the same namespace.
This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor
summary: Info-level alert inhibition.
expr: ALERTS{severity = "info"} == 1 unless on(namespace) ALERTS{alertname != "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1
labels:
severity: none
- name: node-network - name: node-network
rules: rules:
- alert: NodeNetworkInterfaceFlapping - alert: NodeNetworkInterfaceFlapping
@ -61,7 +81,7 @@ spec:
record: instance:node_cpu:ratio record: instance:node_cpu:ratio
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
record: cluster:node_cpu:sum_rate5m record: cluster:node_cpu:sum_rate5m
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu)) - expr: cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))
record: cluster:node_cpu:ratio record: cluster:node_cpu:ratio
- name: kube-prometheus-general.rules - name: kube-prometheus-general.rules
rules: rules:

View File

@ -25,7 +25,7 @@ spec:
description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than 15 minutes. description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready
summary: Pod has been in a non-ready state for more than 15 minutes. summary: Pod has been in a non-ready state for more than 15 minutes.
expr: "sum by (namespace, pod) (\n max by(namespace, pod) (\n kube_pod_status_phase{job=\"kube-state-metrics\", phase=~\"Pending|Unknown\"}\n ) * on(namespace, pod) group_left(owner_kind) topk by(namespace, pod) (\n 1, max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!=\"Job\"})\n )\n) > 0\n" expr: "sum by (namespace, pod, cluster) (\n max by(namespace, pod, cluster) (\n kube_pod_status_phase{job=\"kube-state-metrics\", phase=~\"Pending|Unknown\"}\n ) * on(namespace, pod, cluster) group_left(owner_kind) topk by(namespace, pod, cluster) (\n 1, max by(namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!=\"Job\"})\n )\n) > 0\n"
for: 15m for: 15m
labels: labels:
severity: warning severity: warning
@ -79,16 +79,16 @@ spec:
description: DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15 minutes. description: DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck
summary: DaemonSet rollout is stuck. summary: DaemonSet rollout is stuck.
expr: "(\n (\n kube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_misscheduled{job=\"kube-state-metrics\"}\n !=\n 0\n ) or (\n kube_daemonset_updated_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_available{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n )\n) and (\n changes(kube_daemonset_updated_number_scheduled{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n" expr: "(\n (\n kube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_misscheduled{job=\"kube-state-metrics\"}\n !=\n 0\n ) or (\n kube_daemonset_status_updated_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_available{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n )\n) and (\n changes(kube_daemonset_status_updated_number_scheduled{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n"
for: 15m for: 15m
labels: labels:
severity: warning severity: warning
- alert: KubeContainerWaiting - alert: KubeContainerWaiting
annotations: annotations:
description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour. description: pod/{{`{{`}} $labels.pod {{`}}`}} in namespace {{`{{`}} $labels.namespace {{`}}`}} on container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting
summary: Pod container waiting longer than 1 hour summary: Pod container waiting longer than 1 hour
expr: 'sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0 expr: 'sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
' '
for: 1h for: 1h
@ -114,15 +114,12 @@ spec:
for: 15m for: 15m
labels: labels:
severity: warning severity: warning
- alert: KubeJobCompletion - alert: KubeJobNotCompleted
annotations: annotations:
description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than 12 hours to complete. description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than {{`{{`}} "43200" | humanizeDuration {{`}}`}} to complete.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobcompletion runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted
summary: Job did not complete in time summary: Job did not complete in time
expr: 'kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0 expr: "time() - max by(namespace, job_name, cluster) (kube_job_status_start_time{job=\"kube-state-metrics\"}\n and\nkube_job_status_active{job=\"kube-state-metrics\"} > 0) > 43200\n"
'
for: 12h
labels: labels:
severity: warning severity: warning
- alert: KubeJobFailed - alert: KubeJobFailed
@ -173,7 +170,7 @@ spec:
severity: warning severity: warning
- alert: KubeMemoryOvercommit - alert: KubeMemoryOvercommit
annotations: annotations:
description: Cluster has overcommitted memory resource requests for Pods by {{`{{`}} $value {{`}}`}} bytes and cannot tolerate node failure. description: Cluster has overcommitted memory resource requests for Pods by {{`{{`}} $value | humanize {{`}}`}} bytes and cannot tolerate node failure.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit
summary: Cluster has overcommitted memory resource requests. summary: Cluster has overcommitted memory resource requests.
expr: 'sum(namespace_memory:kube_pod_container_resource_requests:sum{}) - (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0 expr: 'sum(namespace_memory:kube_pod_container_resource_requests:sum{}) - (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0
@ -191,7 +188,7 @@ spec:
description: Cluster has overcommitted CPU resource requests for Namespaces. description: Cluster has overcommitted CPU resource requests for Namespaces.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit
summary: Cluster has overcommitted CPU resource requests. summary: Cluster has overcommitted CPU resource requests.
expr: "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"cpu\"})\n /\nsum(kube_node_status_allocatable{resource=\"cpu\"})\n > 1.5\n" expr: "sum(min without(resource) (kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=~\"(cpu|requests.cpu)\"}))\n /\nsum(kube_node_status_allocatable{resource=\"cpu\", job=\"kube-state-metrics\"})\n > 1.5\n"
for: 5m for: 5m
labels: labels:
severity: warning severity: warning
@ -200,7 +197,7 @@ spec:
description: Cluster has overcommitted memory resource requests for Namespaces. description: Cluster has overcommitted memory resource requests for Namespaces.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit
summary: Cluster has overcommitted memory resource requests. summary: Cluster has overcommitted memory resource requests.
expr: "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"memory\"})\n /\nsum(kube_node_status_allocatable{resource=\"memory\",job=\"kube-state-metrics\"})\n > 1.5\n" expr: "sum(min without(resource) (kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=~\"(memory|requests.memory)\"}))\n /\nsum(kube_node_status_allocatable{resource=\"memory\", job=\"kube-state-metrics\"})\n > 1.5\n"
for: 5m for: 5m
labels: labels:
severity: warning severity: warning
@ -247,7 +244,7 @@ spec:
description: The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is only {{`{{`}} $value | humanizePercentage {{`}}`}} free. description: The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is only {{`{{`}} $value | humanizePercentage {{`}}`}} free.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
summary: PersistentVolume is filling up. summary: PersistentVolume is filling up.
expr: "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\n" expr: "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n"
for: 1m for: 1m
labels: labels:
severity: critical severity: critical
@ -256,7 +253,25 @@ spec:
description: Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is expected to fill up within four days. Currently {{`{{`}} $value | humanizePercentage {{`}}`}} is available. description: Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is expected to fill up within four days. Currently {{`{{`}} $value | humanizePercentage {{`}}`}} is available.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
summary: PersistentVolume is filling up. summary: PersistentVolume is filling up.
expr: "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\n" expr: "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n"
for: 1h
labels:
severity: warning
- alert: KubePersistentVolumeInodesFillingUp
annotations:
description: The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} only has {{`{{`}} $value | humanizePercentage {{`}}`}} free inodes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup
summary: PersistentVolumeInodes are filling up.
expr: "(\n kubelet_volume_stats_inodes_free{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_inodes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_inodes_used{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n"
for: 1m
labels:
severity: critical
- alert: KubePersistentVolumeInodesFillingUp
annotations:
description: Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is expected to run out of inodes within four days. Currently {{`{{`}} $value | humanizePercentage {{`}}`}} of its inodes are free.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup
summary: PersistentVolumeInodes are filling up.
expr: "(\n kubelet_volume_stats_inodes_free{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_inodes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_inodes_used{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_inodes_free{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n"
for: 1h for: 1h
labels: labels:
severity: warning severity: warning
@ -278,7 +293,7 @@ spec:
description: There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running. description: There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch
summary: Different semantic versions of Kubernetes components running. summary: Different semantic versions of Kubernetes components running.
expr: 'count(count by (git_version) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"git_version","$1","git_version","(v[0-9]*.[0-9]*).*"))) > 1 expr: 'count by (cluster) (count by (git_version, cluster) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"git_version","$1","git_version","(v[0-9]*.[0-9]*).*"))) > 1
' '
for: 15m for: 15m
@ -289,7 +304,7 @@ spec:
description: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} $value | humanizePercentage {{`}}`}} errors.' description: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} $value | humanizePercentage {{`}}`}} errors.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors
summary: Kubernetes API server client is experiencing errors. summary: Kubernetes API server client is experiencing errors.
expr: "(sum(rate(rest_client_requests_total{code=~\"5..\"}[5m])) by (instance, job, namespace)\n /\nsum(rate(rest_client_requests_total[5m])) by (instance, job, namespace))\n> 0.01\n" expr: "(sum(rate(rest_client_requests_total{code=~\"5..\"}[5m])) by (cluster, instance, job, namespace)\n /\nsum(rate(rest_client_requests_total[5m])) by (cluster, instance, job, namespace))\n> 0.01\n"
for: 15m for: 15m
labels: labels:
severity: warning severity: warning
@ -367,7 +382,7 @@ spec:
rules: rules:
- alert: KubeClientCertificateExpiration - alert: KubeClientCertificateExpiration
annotations: annotations:
description: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days. description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration
summary: Client certificate is about to expire. summary: Client certificate is about to expire.
expr: 'apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800 expr: 'apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
@ -377,7 +392,7 @@ spec:
severity: warning severity: warning
- alert: KubeClientCertificateExpiration - alert: KubeClientCertificateExpiration
annotations: annotations:
description: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours. description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration
summary: Client certificate is about to expire. summary: Client certificate is about to expire.
expr: 'apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400 expr: 'apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
@ -385,22 +400,22 @@ spec:
' '
labels: labels:
severity: critical severity: critical
- alert: AggregatedAPIErrors - alert: KubeAggregatedAPIErrors
annotations: annotations:
description: An aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has reported errors. It has appeared unavailable {{`{{`}} $value | humanize {{`}}`}} times averaged over the past 10m. description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has reported errors. It has appeared unavailable {{`{{`}} $value | humanize {{`}}`}} times averaged over the past 10m.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/aggregatedapierrors runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors
summary: An aggregated API has reported errors. summary: Kubernetes aggregated API has reported errors.
expr: 'sum by(name, namespace)(increase(aggregator_unavailable_apiservice_total[10m])) > 4 expr: 'sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total[10m])) > 4
' '
labels: labels:
severity: warning severity: warning
- alert: AggregatedAPIDown - alert: KubeAggregatedAPIDown
annotations: annotations:
description: An aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m. description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/aggregatedapidown runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown
summary: An aggregated API is down. summary: Kubernetes aggregated API is down.
expr: '(1 - max by(name, namespace)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85 expr: '(1 - max by(name, namespace, cluster)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85
' '
for: 5m for: 5m
@ -419,9 +434,9 @@ spec:
severity: critical severity: critical
- alert: KubeAPITerminatedRequests - alert: KubeAPITerminatedRequests
annotations: annotations:
description: The apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests. description: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests
summary: The apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests. summary: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
expr: 'sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / ( sum(rate(apiserver_request_total{job="apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20 expr: 'sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / ( sum(rate(apiserver_request_total{job="apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20
' '
@ -457,16 +472,16 @@ spec:
description: Kubelet '{{`{{`}} $labels.node {{`}}`}}' is running at {{`{{`}} $value | humanizePercentage {{`}}`}} of its Pod capacity. description: Kubelet '{{`{{`}} $labels.node {{`}}`}}' is running at {{`{{`}} $value | humanizePercentage {{`}}`}} of its Pod capacity.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods
summary: Kubelet is running at capacity. summary: Kubelet is running at capacity.
expr: "count by(node) (\n (kube_pod_status_phase{job=\"kube-state-metrics\",phase=\"Running\"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job=\"kube-state-metrics\"})\n)\n/\nmax by(node) (\n kube_node_status_capacity{job=\"kube-state-metrics\",resource=\"pods\"} != 1\n) > 0.95\n" expr: "count by(cluster, node) (\n (kube_pod_status_phase{job=\"kube-state-metrics\",phase=\"Running\"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job=\"kube-state-metrics\"})\n)\n/\nmax by(cluster, node) (\n kube_node_status_capacity{job=\"kube-state-metrics\",resource=\"pods\"} != 1\n) > 0.95\n"
for: 15m for: 15m
labels: labels:
severity: warning severity: info
- alert: KubeNodeReadinessFlapping - alert: KubeNodeReadinessFlapping
annotations: annotations:
description: The readiness status of node {{`{{`}} $labels.node {{`}}`}} has changed {{`{{`}} $value {{`}}`}} times in the last 15 minutes. description: The readiness status of node {{`{{`}} $labels.node {{`}}`}} has changed {{`{{`}} $value {{`}}`}} times in the last 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping
summary: Node readiness status is flapping. summary: Node readiness status is flapping.
expr: 'sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (node) > 2 expr: 'sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (cluster, node) > 2
' '
for: 15m for: 15m
@ -488,7 +503,7 @@ spec:
description: Kubelet Pod startup 99th percentile latency is {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}. description: Kubelet Pod startup 99th percentile latency is {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh
summary: Kubelet Pod startup latency is too high. summary: Kubelet Pod startup latency is too high.
expr: 'histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60 expr: 'histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le)) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
' '
for: 15m for: 15m
@ -595,96 +610,85 @@ spec:
severity: critical severity: critical
- name: kube-apiserver-burnrate.rules - name: kube-apiserver-burnrate.rules
rules: rules:
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n" - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n"
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate1d record: apiserver_request:burnrate1d
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n" - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n"
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate1h record: apiserver_request:burnrate1h
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n" - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n"
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate2h record: apiserver_request:burnrate2h
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n" - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"\
,code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n"
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate30m record: apiserver_request:burnrate30m
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n" - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n"
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate3d record: apiserver_request:burnrate3d
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n" - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n"
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate5m record: apiserver_request:burnrate5m
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n" - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n"
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate6h record: apiserver_request:burnrate6h
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n" - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n"
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate1d record: apiserver_request:burnrate1d
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n" - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n"
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate1h record: apiserver_request:burnrate1h
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n" - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n"
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate2h record: apiserver_request:burnrate2h
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n" - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n"
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate30m record: apiserver_request:burnrate30m
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n" - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n"
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate3d record: apiserver_request:burnrate3d
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n" - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n"
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate5m record: apiserver_request:burnrate5m
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n" - expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n"
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate6h record: apiserver_request:burnrate6h
- name: kube-apiserver-histogram.rules - name: kube-apiserver-histogram.rules
rules: rules:
- expr: 'histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0 - expr: 'histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0
' '
labels: labels:
quantile: '0.99' quantile: '0.99'
verb: read verb: read
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile record: cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile
- expr: 'histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0 - expr: 'histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0
' '
labels: labels:
quantile: '0.99' quantile: '0.99'
verb: write verb: write
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile record: cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile
- expr: 'histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
'
labels:
quantile: '0.99'
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- expr: 'histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
'
labels:
quantile: '0.9'
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- expr: 'histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
'
labels:
quantile: '0.5'
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- interval: 3m - interval: 3m
name: kube-apiserver-availability.rules name: kube-apiserver-availability.rules
rules: rules:
@ -704,16 +708,32 @@ spec:
labels: labels:
verb: write verb: write
record: code:apiserver_request_total:increase30d record: code:apiserver_request_total:increase30d
- expr: "1 - (\n (\n # write too slow\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~\"POST|PUT|PATCH|DELETE\"}[30d]))\n -\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30d]))\n ) +\n (\n # read too slow\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~\"LIST|GET\"}[30d]))\n -\n (\n (\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30d]))\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30d]))\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"}\ - expr: 'sum by (cluster, verb, scope) (increase(apiserver_request_slo_duration_seconds_count[1h]))
\ or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n"
'
record: cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase1h
- expr: 'sum by (cluster, verb, scope) (avg_over_time(cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase1h[30d]) * 24 * 30)
'
record: cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d
- expr: 'sum by (cluster, verb, scope, le) (increase(apiserver_request_slo_duration_seconds_bucket[1h]))
'
record: cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase1h
- expr: 'sum by (cluster, verb, scope, le) (avg_over_time(cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase1h[30d]) * 24 * 30)
'
record: cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d
- expr: "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"\
cluster\",le=\"30\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n"
labels: labels:
verb: all verb: all
record: apiserver_request:availability30d record: apiserver_request:availability30d
- expr: "1 - (\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30d]))\n -\n (\n # too slow\n (\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30d]))\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30d]))\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n" - expr: "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n"
labels: labels:
verb: read verb: read
record: apiserver_request:availability30d record: apiserver_request:availability30d
- expr: "1 - (\n (\n # too slow\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~\"POST|PUT|PATCH|DELETE\"}[30d]))\n -\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30d]))\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n" - expr: "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n"
labels: labels:
verb: write verb: write
record: apiserver_request:availability30d record: apiserver_request:availability30d
@ -757,19 +777,19 @@ spec:
record: node_namespace_pod_container:container_memory_cache record: node_namespace_pod_container:container_memory_cache
- expr: "container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,\n max by(namespace, pod, node) (kube_pod_info{node!=\"\"})\n)\n" - expr: "container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,\n max by(namespace, pod, node) (kube_pod_info{node!=\"\"})\n)\n"
record: node_namespace_pod_container:container_memory_swap record: node_namespace_pod_container:container_memory_swap
- expr: "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n" - expr: "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests
- expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n" - expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n"
record: namespace_memory:kube_pod_container_resource_requests:sum record: namespace_memory:kube_pod_container_resource_requests:sum
- expr: "kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n" - expr: "kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests
- expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n" - expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n"
record: namespace_cpu:kube_pod_container_resource_requests:sum record: namespace_cpu:kube_pod_container_resource_requests:sum
- expr: "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n" - expr: "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits
- expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n" - expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n"
record: namespace_memory:kube_pod_container_resource_limits:sum record: namespace_memory:kube_pod_container_resource_limits:sum
- expr: "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n )\n" - expr: "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n )\n"
record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits
- expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n" - expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n"
record: namespace_cpu:kube_pod_container_resource_limits:sum record: namespace_cpu:kube_pod_container_resource_limits:sum
@ -785,6 +805,10 @@ spec:
labels: labels:
workload_type: statefulset workload_type: statefulset
record: namespace_workload_pod:kube_pod_owner:relabel record: namespace_workload_pod:kube_pod_owner:relabel
- expr: "max by (cluster, namespace, workload, pod) (\n label_replace(\n kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"Job\"},\n \"workload\", \"$1\", \"owner_name\", \"(.*)\"\n )\n)\n"
labels:
workload_type: job
record: namespace_workload_pod:kube_pod_owner:relabel
- name: kube-scheduler.rules - name: kube-scheduler.rules
rules: rules:
- expr: 'histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - expr: 'histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
@ -843,27 +867,33 @@ spec:
record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile
- name: node.rules - name: node.rules
rules: rules:
- expr: "topk by(namespace, pod) (1,\n max by (node, namespace, pod) (\n label_replace(kube_pod_info{job=\"kube-state-metrics\",node!=\"\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")\n))\n" - expr: "topk by(cluster, namespace, pod) (1,\n max by (cluster, node, namespace, pod) (\n label_replace(kube_pod_info{job=\"kube-state-metrics\",node!=\"\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")\n))\n"
record: 'node_namespace_pod:kube_pod_info:' record: 'node_namespace_pod:kube_pod_info:'
- expr: "count by (cluster, node) (sum by (node, cpu) (\n node_cpu_seconds_total{job=\"node-exporter\"}\n* on (namespace, pod) group_left(node)\n topk by(namespace, pod) (1, node_namespace_pod:kube_pod_info:)\n))\n" - expr: "count by (cluster, node) (sum by (node, cpu) (\n node_cpu_seconds_total{job=\"node-exporter\"}\n* on (namespace, pod) group_left(node)\n topk by(namespace, pod) (1, node_namespace_pod:kube_pod_info:)\n))\n"
record: node:node_num_cpu:sum record: node:node_num_cpu:sum
- expr: "sum(\n node_memory_MemAvailable_bytes{job=\"node-exporter\"} or\n (\n node_memory_Buffers_bytes{job=\"node-exporter\"} +\n node_memory_Cached_bytes{job=\"node-exporter\"} +\n node_memory_MemFree_bytes{job=\"node-exporter\"} +\n node_memory_Slab_bytes{job=\"node-exporter\"}\n )\n) by (cluster)\n" - expr: "sum(\n node_memory_MemAvailable_bytes{job=\"node-exporter\"} or\n (\n node_memory_Buffers_bytes{job=\"node-exporter\"} +\n node_memory_Cached_bytes{job=\"node-exporter\"} +\n node_memory_MemFree_bytes{job=\"node-exporter\"} +\n node_memory_Slab_bytes{job=\"node-exporter\"}\n )\n) by (cluster)\n"
record: :node_memory_MemAvailable_bytes:sum record: :node_memory_MemAvailable_bytes:sum
- expr: 'sum(rate(node_cpu_seconds_total{job="node-exporter",mode!="idle",mode!="iowait",mode!="steal"}[5m])) /
count(sum(node_cpu_seconds_total{job="node-exporter"}) by (cluster, instance, cpu))
'
record: cluster:node_cpu:ratio_rate5m
- name: kubelet.rules - name: kubelet.rules
rules: rules:
- expr: 'histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) - expr: 'histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (cluster, instance, le) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
' '
labels: labels:
quantile: '0.99' quantile: '0.99'
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
- expr: 'histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) - expr: 'histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (cluster, instance, le) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
' '
labels: labels:
quantile: '0.9' quantile: '0.9'
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
- expr: 'histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) - expr: 'histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (cluster, instance, le) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
' '
labels: labels:

View File

@ -14,7 +14,7 @@ spec:
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up. description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
summary: Filesystem is predicted to run out of space within the next 24 hours. summary: Filesystem is predicted to run out of space within the next 24 hours.
expr: "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 40\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 24*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n" expr: "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 15\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 24*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n"
for: 1h for: 1h
labels: labels:
severity: warning severity: warning
@ -23,7 +23,7 @@ spec:
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up fast. description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up fast.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
summary: Filesystem is predicted to run out of space within the next 4 hours. summary: Filesystem is predicted to run out of space within the next 4 hours.
expr: "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 15\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 4*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n" expr: "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 10\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 4*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n"
for: 1h for: 1h
labels: labels:
severity: critical severity: critical
@ -188,13 +188,13 @@ spec:
severity: critical severity: critical
- name: node-exporter.rules - name: node-exporter.rules
rules: rules:
- expr: "count without (cpu) (\n count without (mode) (\n node_cpu_seconds_total{job=\"node-exporter\"}\n )\n)\n" - expr: "count without (cpu, mode) (\n node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}\n)\n"
record: instance:node_num_cpu:sum record: instance:node_num_cpu:sum
- expr: "1 - avg without (cpu, mode) (\n rate(node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\"}[5m])\n)\n" - expr: "1 - avg without (cpu) (\n sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\"}[5m]))\n)\n"
record: instance:node_cpu_utilisation:rate5m record: instance:node_cpu_utilisation:rate5m
- expr: "(\n node_load1{job=\"node-exporter\"}\n/\n instance:node_num_cpu:sum{job=\"node-exporter\"}\n)\n" - expr: "(\n node_load1{job=\"node-exporter\"}\n/\n instance:node_num_cpu:sum{job=\"node-exporter\"}\n)\n"
record: instance:node_load1_per_cpu:ratio record: instance:node_load1_per_cpu:ratio
- expr: "1 - (\n node_memory_MemAvailable_bytes{job=\"node-exporter\"}\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\"}\n)\n" - expr: "1 - (\n (\n node_memory_MemAvailable_bytes{job=\"node-exporter\"}\n or\n (\n node_memory_Buffers_bytes{job=\"node-exporter\"}\n +\n node_memory_Cached_bytes{job=\"node-exporter\"}\n +\n node_memory_MemFree_bytes{job=\"node-exporter\"}\n +\n node_memory_Slab_bytes{job=\"node-exporter\"}\n )\n )\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\"}\n)\n"
record: instance:node_memory_utilisation:ratio record: instance:node_memory_utilisation:ratio
- expr: 'rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) - expr: 'rate(node_vmstat_pgmajfault{job="node-exporter"}[5m])

View File

@ -25,7 +25,7 @@ spec:
description: Errors while performing watch operations in controller {{`{{`}}$labels.controller{{`}}`}} in {{`{{`}}$labels.namespace{{`}}`}} namespace. description: Errors while performing watch operations in controller {{`{{`}}$labels.controller{{`}}`}} in {{`{{`}}$labels.namespace{{`}}`}} namespace.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorwatcherrors runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorwatcherrors
summary: Errors while performing watch operations in controller. summary: Errors while performing watch operations in controller.
expr: '(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4 expr: '(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[5m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring"}[5m]))) > 0.4
' '
for: 15m for: 15m
@ -69,7 +69,7 @@ spec:
description: Prometheus operator in {{`{{`}} $labels.namespace {{`}}`}} namespace isn't ready to reconcile {{`{{`}} $labels.controller {{`}}`}} resources. description: Prometheus operator in {{`{{`}} $labels.namespace {{`}}`}} namespace isn't ready to reconcile {{`{{`}} $labels.controller {{`}}`}} resources.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatornotready runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatornotready
summary: Prometheus operator not ready summary: Prometheus operator not ready
expr: 'min by(namespace, controller) (max_over_time(prometheus_operator_ready{job="prometheus-operator",namespace="monitoring"}[5m]) == 0) expr: 'min by (controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-operator",namespace="monitoring"}[5m]) == 0)
' '
for: 5m for: 5m
@ -86,4 +86,19 @@ spec:
for: 5m for: 5m
labels: labels:
severity: warning severity: warning
- name: config-reloaders
rules:
- alert: ConfigReloaderSidecarErrors
annotations:
description: 'Errors encountered while the {{`{{`}}$labels.pod{{`}}`}} config-reloader sidecar attempts to sync config in {{`{{`}}$labels.namespace{{`}}`}} namespace.
As a result, configuration for service running in {{`{{`}}$labels.pod{{`}}`}} may be stale and cannot be updated anymore.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/configreloadersidecarerrors
summary: config-reloader sidecar has not had a successful reload for 10m
expr: 'max_over_time(reloader_last_reload_successful{namespace=~".+"}[5m]) == 0
'
for: 10m
labels:
severity: warning

View File

@ -181,6 +181,28 @@ spec:
for: 15m for: 15m
labels: labels:
severity: warning severity: warning
- alert: PrometheusScrapeBodySizeLimitHit
annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed {{`{{`}} printf "%.0f" $value {{`}}`}} scrapes in the last 5m because some targets exceeded the configured body_size_limit.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapebodysizelimithit
summary: Prometheus has dropped some targets that exceeded body size limit.
expr: 'increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
'
for: 15m
labels:
severity: warning
- alert: PrometheusScrapeSampleLimitHit
annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed {{`{{`}} printf "%.0f" $value {{`}}`}} scrapes in the last 5m because some targets exceeded the configured sample_limit.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapesamplelimithit
summary: Prometheus has failed scrapes that have exceeded the configured sample limit.
expr: 'increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
'
for: 15m
labels:
severity: warning
- alert: PrometheusTargetSyncFailure - alert: PrometheusTargetSyncFailure
annotations: annotations:
description: '{{`{{`}} printf "%.0f" $value {{`}}`}} targets in Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} have failed to sync because invalid configuration was supplied.' description: '{{`{{`}} printf "%.0f" $value {{`}}`}} targets in Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} have failed to sync because invalid configuration was supplied.'

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: kubezero-storage name: kubezero-storage
description: KubeZero umbrella chart for all things storage incl. AWS EBS/EFS, openEBS-lvm, gemini description: KubeZero umbrella chart for all things storage incl. AWS EBS/EFS, openEBS-lvm, gemini
type: application type: application
version: 0.7.0 version: 0.7.1
home: https://kubezero.com home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords: keywords:
@ -28,7 +28,7 @@ dependencies:
condition: gemini.enabled condition: gemini.enabled
# repository: https://charts.fairwinds.com/stable # repository: https://charts.fairwinds.com/stable
- name: aws-ebs-csi-driver - name: aws-ebs-csi-driver
version: 2.10.1 version: 2.11.0
condition: aws-ebs-csi-driver.enabled condition: aws-ebs-csi-driver.enabled
# repository: https://kubernetes-sigs.github.io/aws-ebs-csi-driver # repository: https://kubernetes-sigs.github.io/aws-ebs-csi-driver
- name: aws-efs-csi-driver - name: aws-efs-csi-driver

View File

@ -1,9 +1,15 @@
# Helm chart # Helm chart
# v2.11.0
* Bump app/driver to version `v1.11.3`
* Add support for leader election tuning for `csi-provisioner` and `csi-attacher` ([#1371](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/1371), [@moogzy](https://github.com/moogzy))
* Change `fsGroupPolicy` to `File` ([#1377](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/1377), [@ConnorJC3](https://github.com/ConnorJC3))
* Allow all taint for `csi-node` by default ([#1381](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/1381), [@gtxu](https://github.com/gtxu))
## v2.10.1 ## v2.10.1
* Bump app/driver to version `v1.11.2` * Bump app/driver to version `v1.11.2`
## 2.10.0 ## v2.10.0
* Implement securityContext for containers * Implement securityContext for containers
* Add securityContext for node pod * Add securityContext for node pod
* Utilize more secure defaults for securityContext * Utilize more secure defaults for securityContext

View File

@ -3,7 +3,7 @@ annotations:
- kind: added - kind: added
description: Custom controller.updateStrategy to set controller deployment strategy. description: Custom controller.updateStrategy to set controller deployment strategy.
apiVersion: v2 apiVersion: v2
appVersion: 1.11.2 appVersion: 1.11.3
description: A Helm chart for AWS EBS CSI Driver description: A Helm chart for AWS EBS CSI Driver
home: https://github.com/kubernetes-sigs/aws-ebs-csi-driver home: https://github.com/kubernetes-sigs/aws-ebs-csi-driver
keywords: keywords:
@ -19,4 +19,4 @@ maintainers:
name: aws-ebs-csi-driver name: aws-ebs-csi-driver
sources: sources:
- https://github.com/kubernetes-sigs/aws-ebs-csi-driver - https://github.com/kubernetes-sigs/aws-ebs-csi-driver
version: 2.10.1 version: 2.11.0

View File

@ -170,7 +170,18 @@ spec:
{{- if .Values.controller.extraCreateMetadata }} {{- if .Values.controller.extraCreateMetadata }}
- --extra-create-metadata - --extra-create-metadata
{{- end}} {{- end}}
- --leader-election=true - --leader-election={{ .Values.sidecars.provisioner.leaderElection.enabled | required "leader election state for csi-provisioner is required, must be set to true || false." }}
{{- if .Values.sidecars.provisioner.leaderElection.enabled }}
{{- if .Values.sidecars.provisioner.leaderElection.leaseDuration }}
- --leader-election-lease-duration={{ .Values.sidecars.provisioner.leaderElection.leaseDuration }}
{{- end }}
{{- if .Values.sidecars.provisioner.leaderElection.renewDeadline}}
- --leader-election-renew-deadline={{ .Values.sidecars.provisioner.leaderElection.renewDeadline }}
{{- end }}
{{- if .Values.sidecars.provisioner.leaderElection.retryPeriod }}
- --leader-election-retry-period={{ .Values.sidecars.provisioner.leaderElection.retryPeriod }}
{{- end }}
{{- end }}
- --default-fstype={{ .Values.controller.defaultFsType }} - --default-fstype={{ .Values.controller.defaultFsType }}
env: env:
- name: ADDRESS - name: ADDRESS
@ -202,7 +213,18 @@ spec:
args: args:
- --csi-address=$(ADDRESS) - --csi-address=$(ADDRESS)
- --v={{ .Values.sidecars.attacher.logLevel }} - --v={{ .Values.sidecars.attacher.logLevel }}
- --leader-election=true - --leader-election={{ .Values.sidecars.attacher.leaderElection.enabled | required "leader election state for csi-attacher is required, must be set to true || false." }}
{{- if .Values.sidecars.attacher.leaderElection.enabled }}
{{- if .Values.sidecars.attacher.leaderElection.leaseDuration }}
- --leader-election-lease-duration={{ .Values.sidecars.attacher.leaderElection.leaseDuration }}
{{- end }}
{{- if .Values.sidecars.attacher.leaderElection.renewDeadline}}
- --leader-election-renew-deadline={{ .Values.sidecars.attacher.leaderElection.renewDeadline }}
{{- end }}
{{- if .Values.sidecars.attacher.leaderElection.retryPeriod }}
- --leader-election-retry-period={{ .Values.sidecars.attacher.leaderElection.retryPeriod }}
{{- end }}
{{- end }}
env: env:
- name: ADDRESS - name: ADDRESS
value: /var/lib/csi/sockets/pluginproxy/csi.sock value: /var/lib/csi/sockets/pluginproxy/csi.sock

View File

@ -7,3 +7,4 @@ metadata:
spec: spec:
attachRequired: true attachRequired: true
podInfoOnMount: false podInfoOnMount: false
fsGroupPolicy: File

View File

@ -40,8 +40,6 @@ spec:
{{- if .Values.node.tolerateAllTaints }} {{- if .Values.node.tolerateAllTaints }}
- operator: Exists - operator: Exists
{{- else }} {{- else }}
- key: CriticalAddonsOnly
operator: Exists
- operator: Exists - operator: Exists
effect: NoExecute effect: NoExecute
tolerationSeconds: 300 tolerationSeconds: 300

View File

@ -40,8 +40,6 @@ spec:
{{- if .Values.node.tolerateAllTaints }} {{- if .Values.node.tolerateAllTaints }}
- operator: Exists - operator: Exists
{{- else }} {{- else }}
- key: CriticalAddonsOnly
operator: Exists
- operator: Exists - operator: Exists
effect: NoExecute effect: NoExecute
tolerationSeconds: 300 tolerationSeconds: 300

View File

@ -22,6 +22,16 @@ sidecars:
tag: "v3.1.0" tag: "v3.1.0"
logLevel: 2 logLevel: 2
resources: {} resources: {}
# Tune leader lease election for csi-provisioner.
# Leader election is on by default.
leaderElection:
enabled: true
# Optional values to tune lease behavior.
# The arguments provided must be in an acceptable time.ParseDuration format.
# Ref: https://pkg.go.dev/flag#Duration
# leaseDuration: "15s"
# renewDeadline: "10s"
# retryPeriod: "5s"
securityContext: securityContext:
readOnlyRootFilesystem: true readOnlyRootFilesystem: true
allowPrivilegeEscalation: false allowPrivilegeEscalation: false
@ -31,6 +41,16 @@ sidecars:
pullPolicy: IfNotPresent pullPolicy: IfNotPresent
repository: k8s.gcr.io/sig-storage/csi-attacher repository: k8s.gcr.io/sig-storage/csi-attacher
tag: "v3.4.0" tag: "v3.4.0"
# Tune leader lease election for csi-attacher.
# Leader election is on by default.
leaderElection:
enabled: true
# Optional values to tune lease behavior.
# The arguments provided must be in an acceptable time.ParseDuration format.
# Ref: https://pkg.go.dev/flag#Duration
# leaseDuration: "15s"
# renewDeadline: "10s"
# retryPeriod: "5s"
logLevel: 2 logLevel: 2
resources: {} resources: {}
securityContext: securityContext:
@ -183,7 +203,7 @@ node:
nodeSelector: {} nodeSelector: {}
podAnnotations: {} podAnnotations: {}
podLabels: {} podLabels: {}
tolerateAllTaints: false tolerateAllTaints: true
tolerations: [] tolerations: []
resources: {} resources: {}
serviceAccount: serviceAccount:

View File

@ -18,7 +18,7 @@
"subdir": "contrib/mixin" "subdir": "contrib/mixin"
} }
}, },
"version": "74aa38ec10bc22d34ffd204f46df6e460b78d855", "version": "19002cfc689fba2b8f56605e5797bf79f8b61fdd",
"sum": "W/Azptf1PoqjyMwJON96UY69MFugDA4IAYiKURscryc=" "sum": "W/Azptf1PoqjyMwJON96UY69MFugDA4IAYiKURscryc="
}, },
{ {
@ -38,7 +38,7 @@
"subdir": "grafana-builder" "subdir": "grafana-builder"
} }
}, },
"version": "c132c4afcf17491718539db4c2d94c0ea4346120", "version": "dbf6fc14105c28b6fd0253005f7ca2da37d3d4e1",
"sum": "tDR6yT2GVfw0wTU12iZH+m01HrbIr6g/xN+/8nzNkU0=" "sum": "tDR6yT2GVfw0wTU12iZH+m01HrbIr6g/xN+/8nzNkU0="
}, },
{ {
@ -58,7 +58,7 @@
"subdir": "lib/promgrafonnet" "subdir": "lib/promgrafonnet"
} }
}, },
"version": "eb98d4f74e8ac9c30b1f0e815b07bed31da76c8f", "version": "5e44626d70c2bf2d35c37f3fee5a6261a5335cc6",
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps=" "sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
}, },
{ {

View File

@ -63,11 +63,6 @@ gemini:
aws-ebs-csi-driver: aws-ebs-csi-driver:
enabled: false enabled: false
# starting with 1.6 the ebs-plugin panics with "could not get number of attached ENIs"
# somewhere related to metadata / volumeattach limits and nitro instances ... AWS as usual
#image:
# tag: v1.5.3
controller: controller:
replicaCount: 1 replicaCount: 1
logLevel: 2 logLevel: 2

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: kubezero name: kubezero
description: KubeZero - Root App of Apps chart description: KubeZero - Root App of Apps chart
type: application type: application
version: 1.23.10-3 version: 1.23.11
home: https://kubezero.com home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords: keywords:

View File

@ -68,4 +68,4 @@ Kubernetes: `>= 1.20.0`
| storage.targetRevision | string | `"0.7.0"` | | | storage.targetRevision | string | `"0.7.0"` | |
---------------------------------------------- ----------------------------------------------
Autogenerated from chart metadata using [helm-docs v1.9.1](https://github.com/norwoodj/helm-docs/releases/v1.9.1) Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0)

View File

@ -22,7 +22,7 @@ cert-manager:
storage: storage:
enabled: false enabled: false
targetRevision: 0.7.0 targetRevision: 0.7.1
aws-ebs-csi-driver: aws-ebs-csi-driver:
enabled: false enabled: false
aws-efs-csi-driver: aws-efs-csi-driver:
@ -52,7 +52,7 @@ istio-private-ingress:
metrics: metrics:
enabled: false enabled: false
namespace: monitoring namespace: monitoring
targetRevision: 0.8.1 targetRevision: 0.8.4
istio: istio:
grafana: {} grafana: {}
prometheus: {} prometheus: {}

View File

@ -29,4 +29,4 @@ Kubernetes: `>= 1.20.0`
| manticoresearch.worker.volume.size | string | `"4Gi"` | | | manticoresearch.worker.volume.size | string | `"4Gi"` | |
---------------------------------------------- ----------------------------------------------
Autogenerated from chart metadata using [helm-docs v1.9.1](https://github.com/norwoodj/helm-docs/releases/v1.9.1) Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0)