feat: another round of upgrade to 1.23.11
This commit is contained in:
parent
1f7971220e
commit
c2b929f952
@ -133,6 +133,9 @@ control_plane_upgrade kubeadm_upgrade
|
||||
|
||||
kubectl delete ds kube-multus-ds -n kube-system
|
||||
|
||||
# Required due to chart upgrade to 4.X part of prometheus-stack 40.X
|
||||
kubectl delete daemonset metrics-prometheus-node-exporter -n monitoring
|
||||
|
||||
control_plane_upgrade "apply_network, apply_addons"
|
||||
|
||||
kubectl rollout restart daemonset/calico-node -n kube-system
|
||||
|
@ -39,4 +39,4 @@ Kubernetes: `>= 1.18.0`
|
||||
| service.port | int | `3310` | The port to be used by the clamav service |
|
||||
|
||||
----------------------------------------------
|
||||
Autogenerated from chart metadata using [helm-docs v1.9.1](https://github.com/norwoodj/helm-docs/releases/v1.9.1)
|
||||
Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0)
|
||||
|
@ -2,7 +2,7 @@ apiVersion: v2
|
||||
name: kubeadm
|
||||
description: KubeZero Kubeadm cluster config
|
||||
type: application
|
||||
version: 1.23.10
|
||||
version: 1.23.11
|
||||
home: https://kubezero.com
|
||||
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
|
||||
keywords:
|
||||
|
@ -3,7 +3,7 @@ name: kubezero-addons
|
||||
description: KubeZero umbrella chart for various optional cluster addons
|
||||
type: application
|
||||
version: 0.6.2
|
||||
appVersion: v1.23.10
|
||||
appVersion: v1.23.11
|
||||
home: https://kubezero.com
|
||||
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
|
||||
keywords:
|
||||
|
@ -2,7 +2,7 @@ apiVersion: v2
|
||||
name: kubezero-metrics
|
||||
description: KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all Kubernetes integrations.
|
||||
type: application
|
||||
version: 0.8.1
|
||||
version: 0.8.4
|
||||
home: https://kubezero.com
|
||||
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
|
||||
keywords:
|
||||
@ -18,7 +18,7 @@ dependencies:
|
||||
version: ">= 0.1.5"
|
||||
repository: https://cdn.zero-downtime.net/charts/
|
||||
- name: kube-prometheus-stack
|
||||
version: 39.9.0
|
||||
version: 40.0.0
|
||||
# Switch back to upstream once all alerts are fixed eg. etcd gpcr
|
||||
# repository: https://prometheus-community.github.io/helm-charts
|
||||
- name: prometheus-adapter
|
||||
|
@ -6,20 +6,20 @@ annotations:
|
||||
url: https://github.com/prometheus-operator/kube-prometheus
|
||||
artifacthub.io/operator: "true"
|
||||
apiVersion: v2
|
||||
appVersion: 0.58.0
|
||||
appVersion: 0.59.1
|
||||
dependencies:
|
||||
- condition: kubeStateMetrics.enabled
|
||||
name: kube-state-metrics
|
||||
repository: https://prometheus-community.github.io/helm-charts
|
||||
version: 4.15.*
|
||||
version: 4.18.*
|
||||
- condition: nodeExporter.enabled
|
||||
name: prometheus-node-exporter
|
||||
repository: https://prometheus-community.github.io/helm-charts
|
||||
version: 3.3.*
|
||||
version: 4.2.*
|
||||
- condition: grafana.enabled
|
||||
name: grafana
|
||||
repository: https://grafana.github.io/helm-charts
|
||||
version: 6.32.*
|
||||
version: 6.38.*
|
||||
description: kube-prometheus-stack collects Kubernetes manifests, Grafana dashboards,
|
||||
and Prometheus rules combined with documentation and scripts to provide easy to
|
||||
operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus
|
||||
@ -51,4 +51,4 @@ sources:
|
||||
- https://github.com/prometheus-community/helm-charts
|
||||
- https://github.com/prometheus-operator/kube-prometheus
|
||||
type: application
|
||||
version: 39.9.0
|
||||
version: 40.0.0
|
||||
|
@ -80,6 +80,33 @@ _See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documen
|
||||
|
||||
A major chart version change (like v1.2.3 -> v2.0.0) indicates that there is an incompatible breaking change needing manual actions.
|
||||
|
||||
### From 39.x to 40.x
|
||||
|
||||
This version upgrades Prometheus-Operator to v0.59.1, Prometheus to v2.38.0, kube-state-metrics to v2.6.0 and Thanos to v0.28.0.
|
||||
This version also upgrades the Helm charts of kube-state-metrics to 4.18.0 and prometheus-node-exporter to 4.2.0.
|
||||
|
||||
Run these commands to update the CRDs before applying the upgrade.
|
||||
|
||||
```console
|
||||
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
|
||||
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml
|
||||
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
|
||||
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
|
||||
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
|
||||
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
|
||||
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
|
||||
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
|
||||
```
|
||||
|
||||
Starting from prometheus-node-exporter version 4.0.0, the `node exporter` chart is using the [Kubernetes recommended labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/common-labels/). Therefore you have to delete the daemonset before you upgrade.
|
||||
|
||||
```console
|
||||
kubectl delete daemonset -l app=prometheus-node-exporter
|
||||
helm upgrade -i kube-prometheus-stack prometheus-community/kube-prometheus-stack
|
||||
```
|
||||
|
||||
If you use your own custom [ServiceMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#servicemonitor) or [PodMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#podmonitor), please ensure to upgrade their `selector` fields accordingly to the new labels.
|
||||
|
||||
### From 38.x to 39.x
|
||||
|
||||
This upgraded prometheus-operator to v0.58.0 and prometheus to v2.37.0
|
||||
|
@ -1,5 +1,5 @@
|
||||
apiVersion: v2
|
||||
appVersion: 9.0.5
|
||||
appVersion: 9.1.4
|
||||
description: The leading tool for querying and visualizing time series and metrics.
|
||||
home: https://grafana.net
|
||||
icon: https://raw.githubusercontent.com/grafana/grafana/master/public/img/logo_transparent_400x.png
|
||||
@ -19,4 +19,4 @@ name: grafana
|
||||
sources:
|
||||
- https://github.com/grafana/grafana
|
||||
type: application
|
||||
version: 6.32.10
|
||||
version: 6.38.0
|
||||
|
@ -67,6 +67,7 @@ This version requires Helm >= 3.1.0.
|
||||
| `service.type` | Kubernetes service type | `ClusterIP` |
|
||||
| `service.port` | Kubernetes port where service is exposed | `80` |
|
||||
| `service.portName` | Name of the port on the service | `service` |
|
||||
| `service.appProtocol` | Adds the appProtocol field to the service | `` |
|
||||
| `service.targetPort` | Internal service is port | `3000` |
|
||||
| `service.nodePort` | Kubernetes service nodePort | `nil` |
|
||||
| `service.annotations` | Service annotations (can be templated) | `{}` |
|
||||
@ -127,6 +128,7 @@ This version requires Helm >= 3.1.0.
|
||||
| `extraEmptyDirMounts` | Additional grafana server emptyDir volume mounts | `[]` |
|
||||
| `plugins` | Plugins to be loaded along with Grafana | `[]` |
|
||||
| `datasources` | Configure grafana datasources (passed through tpl) | `{}` |
|
||||
| `alerting` | Configure grafana alerting (passed through tpl) | `{}` |
|
||||
| `notifiers` | Configure grafana notifiers | `{}` |
|
||||
| `dashboardProviders` | Configure grafana dashboard providers | `{}` |
|
||||
| `dashboards` | Dashboards to import | `{}` |
|
||||
@ -233,13 +235,14 @@ This version requires Helm >= 3.1.0.
|
||||
| `imageRenderer.priorityClassName` | image-renderer deployment priority class | `''` |
|
||||
| `imageRenderer.service.enabled` | Enable the image-renderer service | `true` |
|
||||
| `imageRenderer.service.portName` | image-renderer service port name | `http` |
|
||||
| `imageRenderer.service.port` | image-renderer service port used by both service and deployment | `8081` |
|
||||
| `imageRenderer.grafanaProtocol` | Protocol to use for image renderer callback url | `http` |
|
||||
| `imageRenderer.service.port` | image-renderer port used by deployment | `8081` |
|
||||
| `imageRenderer.service.targetPort` | image-renderer service port used by service | `8081` |
|
||||
| `imageRenderer.appProtocol` | Adds the appProtocol field to the service | `` |
|
||||
| `imageRenderer.grafanaSubPath` | Grafana sub path to use for image renderer callback url | `''` |
|
||||
| `imageRenderer.podPortName` | name of the image-renderer port on the pod | `http` |
|
||||
| `imageRenderer.revisionHistoryLimit` | number of image-renderer replica sets to keep | `10` |
|
||||
| `imageRenderer.networkPolicy.limitIngress` | Enable a NetworkPolicy to limit inbound traffic from only the created grafana pods | `true` |
|
||||
| `imageRenderer.networkPolicy.limitEgress` | Enable a NetworkPolicy to limit outbound traffic to only the created grafana pods | `false` |
|
||||
| `imageRenderer.networkPolicy.limitIngress` | Enable a NetworkPolicy to limit inbound traffic from only the created grafana pods | `true` |
|
||||
| `imageRenderer.networkPolicy.limitEgress` | Enable a NetworkPolicy to limit outbound traffic to only the created grafana pods | `false` |
|
||||
| `imageRenderer.resources` | Set resource limits for image-renderer pdos | `{}` |
|
||||
| `imageRenderer.nodeSelector` | Node labels for pod assignment | `{}` |
|
||||
| `imageRenderer.tolerations` | Toleration labels for pod assignment | `[]` |
|
||||
@ -273,7 +276,7 @@ ingress:
|
||||
### Example of extraVolumeMounts
|
||||
|
||||
Volume can be type persistentVolumeClaim or hostPath but not both at same time.
|
||||
If none existingClaim or hostPath argument is givent then type is emptyDir.
|
||||
If neither existingClaim or hostPath argument is given then type is emptyDir.
|
||||
|
||||
```yaml
|
||||
- extraVolumeMounts:
|
||||
@ -482,7 +485,7 @@ grafana.ini:
|
||||
|
||||
## How to securely reference secrets in grafana.ini
|
||||
|
||||
This example uses Grafana uses [file providers](https://grafana.com/docs/grafana/latest/administration/configuration/#file-provider) for secret values and the `extraSecretMounts` configuration flag (Additional grafana server secret mounts) to mount the secrets.
|
||||
This example uses Grafana [file providers](https://grafana.com/docs/grafana/latest/administration/configuration/#file-provider) for secret values and the `extraSecretMounts` configuration flag (Additional grafana server secret mounts) to mount the secrets.
|
||||
|
||||
In grafana.ini:
|
||||
|
||||
|
@ -141,22 +141,11 @@ Return the appropriate apiVersion for ingress.
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Return the appropriate apiVersion for podSecurityPolicy.
|
||||
*/}}
|
||||
{{- define "grafana.podSecurityPolicy.apiVersion" -}}
|
||||
{{- if and (.Capabilities.APIVersions.Has "policy/v1beta1") (semverCompare ">= 1.16-0" .Capabilities.KubeVersion.Version) -}}
|
||||
{{- print "policy/v1beta1" -}}
|
||||
{{- else -}}
|
||||
{{- print "extensions/v1beta1" -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Return the appropriate apiVersion for podDisruptionBudget.
|
||||
*/}}
|
||||
{{- define "grafana.podDisruptionBudget.apiVersion" -}}
|
||||
{{- if and (.Capabilities.APIVersions.Has "policy/v1") (semverCompare ">= 1.21-0" .Capabilities.KubeVersion.Version) -}}
|
||||
{{- if $.Capabilities.APIVersions.Has "policy/v1/PodDisruptionBudget" -}}
|
||||
{{- print "policy/v1" -}}
|
||||
{{- else -}}
|
||||
{{- print "policy/v1beta1" -}}
|
||||
|
@ -1,17 +1,16 @@
|
||||
|
||||
{{- define "grafana.pod" -}}
|
||||
{{- if .Values.schedulerName }}
|
||||
schedulerName: "{{ .Values.schedulerName }}"
|
||||
{{- end }}
|
||||
serviceAccountName: {{ template "grafana.serviceAccountName" . }}
|
||||
automountServiceAccountToken: {{ .Values.serviceAccount.autoMount }}
|
||||
{{- if .Values.securityContext }}
|
||||
{{- with .Values.securityContext }}
|
||||
securityContext:
|
||||
{{ toYaml .Values.securityContext | indent 2 }}
|
||||
{{- toYaml . | nindent 2 }}
|
||||
{{- end }}
|
||||
{{- if .Values.hostAliases }}
|
||||
{{- with .Values.hostAliases }}
|
||||
hostAliases:
|
||||
{{ toYaml .Values.hostAliases | indent 2 }}
|
||||
{{- toYaml . | nindent 2 }}
|
||||
{{- end }}
|
||||
{{- if .Values.priorityClassName }}
|
||||
priorityClassName: {{ .Values.priorityClassName }}
|
||||
@ -31,8 +30,10 @@ initContainers:
|
||||
runAsNonRoot: false
|
||||
runAsUser: 0
|
||||
command: ["chown", "-R", "{{ .Values.securityContext.runAsUser }}:{{ .Values.securityContext.runAsGroup }}", "/var/lib/grafana"]
|
||||
{{- with .Values.initChownData.resources }}
|
||||
resources:
|
||||
{{ toYaml .Values.initChownData.resources | indent 6 }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
volumeMounts:
|
||||
- name: storage
|
||||
mountPath: "/var/lib/grafana"
|
||||
@ -50,13 +51,19 @@ initContainers:
|
||||
imagePullPolicy: {{ .Values.downloadDashboardsImage.pullPolicy }}
|
||||
command: ["/bin/sh"]
|
||||
args: [ "-c", "mkdir -p /var/lib/grafana/dashboards/default && /bin/sh -x /etc/grafana/download_dashboards.sh" ]
|
||||
{{- with .Values.downloadDashboards.resources }}
|
||||
resources:
|
||||
{{ toYaml .Values.downloadDashboards.resources | indent 6 }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
env:
|
||||
{{- range $key, $value := .Values.downloadDashboards.env }}
|
||||
- name: "{{ $key }}"
|
||||
value: "{{ $value }}"
|
||||
{{- end }}
|
||||
{{- with .Values.downloadDashboards.securityContext }}
|
||||
securityContext:
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.downloadDashboards.envFromSecret }}
|
||||
envFrom:
|
||||
- secretRef:
|
||||
@ -86,6 +93,14 @@ initContainers:
|
||||
{{- end }}
|
||||
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
|
||||
env:
|
||||
{{- range $key, $value := .Values.sidecar.datasources.env }}
|
||||
- name: "{{ $key }}"
|
||||
value: "{{ $value }}"
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.datasources.ignoreAlreadyProcessed }}
|
||||
- name: IGNORE_ALREADY_PROCESSED
|
||||
value: "true"
|
||||
{{- end }}
|
||||
- name: METHOD
|
||||
value: "LIST"
|
||||
- name: LABEL
|
||||
@ -94,6 +109,10 @@ initContainers:
|
||||
- name: LABEL_VALUE
|
||||
value: {{ quote .Values.sidecar.datasources.labelValue }}
|
||||
{{- end }}
|
||||
{{- if or .Values.sidecar.logLevel .Values.sidecar.datasources.logLevel }}
|
||||
- name: LOG_LEVEL
|
||||
value: {{ default .Values.sidecar.logLevel .Values.sidecar.datasources.logLevel }}
|
||||
{{- end }}
|
||||
- name: FOLDER
|
||||
value: "/etc/grafana/provisioning/datasources"
|
||||
- name: RESOURCE
|
||||
@ -110,12 +129,14 @@ initContainers:
|
||||
- name: SKIP_TLS_VERIFY
|
||||
value: "{{ .Values.sidecar.skipTlsVerify }}"
|
||||
{{- end }}
|
||||
{{- with .Values.sidecar.resources }}
|
||||
resources:
|
||||
{{ toYaml .Values.sidecar.resources | indent 6 }}
|
||||
{{- if .Values.sidecar.securityContext }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.sidecar.securityContext }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.sidecar.securityContext | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
volumeMounts:
|
||||
- name: sc-datasources-volume
|
||||
mountPath: "/etc/grafana/provisioning/datasources"
|
||||
@ -129,10 +150,26 @@ initContainers:
|
||||
{{- end }}
|
||||
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
|
||||
env:
|
||||
{{- range $key, $value := .Values.sidecar.notifiers.env }}
|
||||
- name: "{{ $key }}"
|
||||
value: "{{ $value }}"
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.notifiers.ignoreAlreadyProcessed }}
|
||||
- name: IGNORE_ALREADY_PROCESSED
|
||||
value: "true"
|
||||
{{- end }}
|
||||
- name: METHOD
|
||||
value: LIST
|
||||
- name: LABEL
|
||||
value: "{{ .Values.sidecar.notifiers.label }}"
|
||||
{{- if .Values.sidecar.notifiers.labelValue }}
|
||||
- name: LABEL_VALUE
|
||||
value: {{ quote .Values.sidecar.notifiers.labelValue }}
|
||||
{{- end }}
|
||||
{{- if or .Values.sidecar.logLevel .Values.sidecar.notifiers.logLevel }}
|
||||
- name: LOG_LEVEL
|
||||
value: {{ default .Values.sidecar.logLevel .Values.sidecar.notifiers.logLevel }}
|
||||
{{- end }}
|
||||
- name: FOLDER
|
||||
value: "/etc/grafana/provisioning/notifiers"
|
||||
- name: RESOURCE
|
||||
@ -149,20 +186,22 @@ initContainers:
|
||||
- name: SKIP_TLS_VERIFY
|
||||
value: "{{ .Values.sidecar.skipTlsVerify }}"
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.livenessProbe }}
|
||||
{{- with .Values.sidecar.livenessProbe }}
|
||||
livenessProbe:
|
||||
{{ toYaml .Values.livenessProbe | indent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.readinessProbe }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.sidecar.readinessProbe }}
|
||||
readinessProbe:
|
||||
{{ toYaml .Values.readinessProbe | indent 6 }}
|
||||
{{- end }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.sidecar.resources }}
|
||||
resources:
|
||||
{{ toYaml .Values.sidecar.resources | indent 6 }}
|
||||
{{- if .Values.sidecar.securityContext }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.sidecar.securityContext }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.sidecar.securityContext | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
volumeMounts:
|
||||
- name: sc-notifiers-volume
|
||||
mountPath: "/etc/grafana/provisioning/notifiers"
|
||||
@ -190,6 +229,14 @@ containers:
|
||||
{{- end }}
|
||||
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
|
||||
env:
|
||||
{{- range $key, $value := .Values.sidecar.dashboards.env }}
|
||||
- name: "{{ $key }}"
|
||||
value: "{{ $value }}"
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.dashboards.ignoreAlreadyProcessed }}
|
||||
- name: IGNORE_ALREADY_PROCESSED
|
||||
value: "true"
|
||||
{{- end }}
|
||||
- name: METHOD
|
||||
value: {{ .Values.sidecar.dashboards.watchMethod }}
|
||||
- name: LABEL
|
||||
@ -198,9 +245,9 @@ containers:
|
||||
- name: LABEL_VALUE
|
||||
value: {{ quote .Values.sidecar.dashboards.labelValue }}
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.logLevel }}
|
||||
{{- if or .Values.sidecar.logLevel .Values.sidecar.dashboards.logLevel }}
|
||||
- name: LOG_LEVEL
|
||||
value: {{ quote .Values.sidecar.logLevel }}
|
||||
value: {{ default .Values.sidecar.logLevel .Values.sidecar.dashboards.logLevel }}
|
||||
{{- end }}
|
||||
- name: FOLDER
|
||||
value: "{{ .Values.sidecar.dashboards.folder }}{{- with .Values.sidecar.dashboards.defaultFolderName }}/{{ . }}{{- end }}"
|
||||
@ -227,27 +274,35 @@ containers:
|
||||
value: "{{ .Values.sidecar.dashboards.script }}"
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.dashboards.watchServerTimeout }}
|
||||
{{- if ne .Values.sidecar.dashboards.watchMethod "WATCH" }}
|
||||
{{- fail (printf "Cannot use .Values.sidecar.dashboards.watchServerTimeout with .Values.sidecar.dashboards.watchMethod %s" .Values.sidecar.dashboards.watchMethod) }}
|
||||
{{- end }}
|
||||
- name: WATCH_SERVER_TIMEOUT
|
||||
value: "{{ .Values.sidecar.dashboards.watchServerTimeout }}"
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.dashboards.watchClientTimeout }}
|
||||
{{- if ne .Values.sidecar.dashboards.watchMethod "WATCH" }}
|
||||
{{- fail (printf "Cannot use .Values.sidecar.dashboards.watchClientTimeout with .Values.sidecar.dashboards.watchMethod %s" .Values.sidecar.dashboards.watchMethod) }}
|
||||
{{- end }}
|
||||
- name: WATCH_CLIENT_TIMEOUT
|
||||
value: "{{ .Values.sidecar.dashboards.watchClientTimeout }}"
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.livenessProbe }}
|
||||
{{- with .Values.sidecar.livenessProbe }}
|
||||
livenessProbe:
|
||||
{{ toYaml .Values.livenessProbe | indent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.readinessProbe }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.sidecar.readinessProbe }}
|
||||
readinessProbe:
|
||||
{{ toYaml .Values.readinessProbe | indent 6 }}
|
||||
{{- end }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.sidecar.resources }}
|
||||
resources:
|
||||
{{ toYaml .Values.sidecar.resources | indent 6 }}
|
||||
{{- if .Values.sidecar.securityContext }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.sidecar.securityContext }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.sidecar.securityContext | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
volumeMounts:
|
||||
- name: sc-dashboard-volume
|
||||
mountPath: {{ .Values.sidecar.dashboards.folder | quote }}
|
||||
@ -264,6 +319,14 @@ containers:
|
||||
{{- end }}
|
||||
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
|
||||
env:
|
||||
{{- range $key, $value := .Values.sidecar.datasources.env }}
|
||||
- name: "{{ $key }}"
|
||||
value: "{{ $value }}"
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.datasources.ignoreAlreadyProcessed }}
|
||||
- name: IGNORE_ALREADY_PROCESSED
|
||||
value: "true"
|
||||
{{- end }}
|
||||
- name: METHOD
|
||||
value: {{ .Values.sidecar.datasources.watchMethod }}
|
||||
- name: LABEL
|
||||
@ -272,6 +335,10 @@ containers:
|
||||
- name: LABEL_VALUE
|
||||
value: {{ quote .Values.sidecar.datasources.labelValue }}
|
||||
{{- end }}
|
||||
{{- if or .Values.sidecar.logLevel .Values.sidecar.datasources.logLevel }}
|
||||
- name: LOG_LEVEL
|
||||
value: {{ default .Values.sidecar.logLevel .Values.sidecar.datasources.logLevel }}
|
||||
{{- end }}
|
||||
- name: FOLDER
|
||||
value: "/etc/grafana/provisioning/datasources"
|
||||
- name: RESOURCE
|
||||
@ -288,6 +355,10 @@ containers:
|
||||
- name: SKIP_TLS_VERIFY
|
||||
value: "{{ .Values.sidecar.skipTlsVerify }}"
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.datasources.script }}
|
||||
- name: SCRIPT
|
||||
value: "{{ .Values.sidecar.datasources.script }}"
|
||||
{{- end }}
|
||||
{{- if and (not .Values.env.GF_SECURITY_ADMIN_USER) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }}
|
||||
- name: REQ_USERNAME
|
||||
valueFrom:
|
||||
@ -308,20 +379,36 @@ containers:
|
||||
- name: REQ_METHOD
|
||||
value: POST
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.livenessProbe }}
|
||||
{{- if .Values.sidecar.datasources.watchServerTimeout }}
|
||||
{{- if ne .Values.sidecar.datasources.watchMethod "WATCH" }}
|
||||
{{- fail (printf "Cannot use .Values.sidecar.datasources.watchServerTimeout with .Values.sidecar.datasources.watchMethod %s" .Values.sidecar.datasources.watchMethod) }}
|
||||
{{- end }}
|
||||
- name: WATCH_SERVER_TIMEOUT
|
||||
value: "{{ .Values.sidecar.datasources.watchServerTimeout }}"
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.datasources.watchClientTimeout }}
|
||||
{{- if ne .Values.sidecar.datasources.watchMethod "WATCH" }}
|
||||
{{- fail (printf "Cannot use .Values.sidecar.datasources.watchClientTimeout with .Values.sidecar.datasources.watchMethod %s" .Values.sidecar.datasources.watchMethod) }}
|
||||
{{- end }}
|
||||
- name: WATCH_CLIENT_TIMEOUT
|
||||
value: "{{ .Values.sidecar.datasources.watchClientTimeout }}"
|
||||
{{- end }}
|
||||
{{- with .Values.sidecar.livenessProbe }}
|
||||
livenessProbe:
|
||||
{{ toYaml .Values.livenessProbe | indent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.readinessProbe }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.sidecar.readinessProbe }}
|
||||
readinessProbe:
|
||||
{{ toYaml .Values.readinessProbe | indent 6 }}
|
||||
{{- end }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.sidecar.resources }}
|
||||
resources:
|
||||
{{ toYaml .Values.sidecar.resources | indent 6 }}
|
||||
{{- if .Values.sidecar.securityContext }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.sidecar.securityContext }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.sidecar.securityContext | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
volumeMounts:
|
||||
- name: sc-datasources-volume
|
||||
mountPath: "/etc/grafana/provisioning/datasources"
|
||||
@ -335,6 +422,14 @@ containers:
|
||||
{{- end }}
|
||||
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
|
||||
env:
|
||||
{{- range $key, $value := .Values.sidecar.plugins.env }}
|
||||
- name: "{{ $key }}"
|
||||
value: "{{ $value }}"
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.plugins.ignoreAlreadyProcessed }}
|
||||
- name: IGNORE_ALREADY_PROCESSED
|
||||
value: "true"
|
||||
{{- end }}
|
||||
- name: METHOD
|
||||
value: {{ .Values.sidecar.plugins.watchMethod }}
|
||||
- name: LABEL
|
||||
@ -343,6 +438,10 @@ containers:
|
||||
- name: LABEL_VALUE
|
||||
value: {{ quote .Values.sidecar.plugins.labelValue }}
|
||||
{{- end }}
|
||||
{{- if or .Values.sidecar.logLevel .Values.sidecar.plugins.logLevel }}
|
||||
- name: LOG_LEVEL
|
||||
value: {{ default .Values.sidecar.logLevel .Values.sidecar.plugins.logLevel }}
|
||||
{{- end }}
|
||||
- name: FOLDER
|
||||
value: "/etc/grafana/provisioning/plugins"
|
||||
- name: RESOURCE
|
||||
@ -355,6 +454,10 @@ containers:
|
||||
- name: NAMESPACE
|
||||
value: "{{ .Values.sidecar.plugins.searchNamespace | join "," }}"
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.plugins.script }}
|
||||
- name: SCRIPT
|
||||
value: "{{ .Values.sidecar.plugins.script }}"
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.skipTlsVerify }}
|
||||
- name: SKIP_TLS_VERIFY
|
||||
value: "{{ .Values.sidecar.skipTlsVerify }}"
|
||||
@ -379,20 +482,36 @@ containers:
|
||||
- name: REQ_METHOD
|
||||
value: POST
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.livenessProbe }}
|
||||
{{- if .Values.sidecar.plugins.watchServerTimeout }}
|
||||
{{- if ne .Values.sidecar.plugins.watchMethod "WATCH" }}
|
||||
{{- fail (printf "Cannot use .Values.sidecar.plugins.watchServerTimeout with .Values.sidecar.plugins.watchMethod %s" .Values.sidecar.plugins.watchMethod) }}
|
||||
{{- end }}
|
||||
- name: WATCH_SERVER_TIMEOUT
|
||||
value: "{{ .Values.sidecar.plugins.watchServerTimeout }}"
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.plugins.watchClientTimeout }}
|
||||
{{- if ne .Values.sidecar.plugins.watchMethod "WATCH" }}
|
||||
{{- fail (printf "Cannot use .Values.sidecar.plugins.watchClientTimeout with .Values.sidecar.plugins.watchMethod %s" .Values.sidecar.plugins.watchMethod) }}
|
||||
{{- end }}
|
||||
- name: WATCH_CLIENT_TIMEOUT
|
||||
value: "{{ .Values.sidecar.plugins.watchClientTimeout }}"
|
||||
{{- end }}
|
||||
{{- with .Values.sidecar.livenessProbe }}
|
||||
livenessProbe:
|
||||
{{ toYaml .Values.livenessProbe | indent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.sidecar.readinessProbe }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.sidecar.readinessProbe }}
|
||||
readinessProbe:
|
||||
{{ toYaml .Values.readinessProbe | indent 6 }}
|
||||
{{- end }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.sidecar.resources }}
|
||||
resources:
|
||||
{{ toYaml .Values.sidecar.resources | indent 6 }}
|
||||
{{- if .Values.sidecar.securityContext }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.sidecar.securityContext }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.sidecar.securityContext | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
volumeMounts:
|
||||
- name: sc-plugins-volume
|
||||
mountPath: "/etc/grafana/provisioning/plugins"
|
||||
@ -410,10 +529,10 @@ containers:
|
||||
- {{ . }}
|
||||
{{- end }}
|
||||
{{- end}}
|
||||
{{- if .Values.containerSecurityContext }}
|
||||
{{- with .Values.containerSecurityContext }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.containerSecurityContext | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
volumeMounts:
|
||||
- name: config
|
||||
mountPath: "/etc/grafana/grafana.ini"
|
||||
@ -466,6 +585,13 @@ containers:
|
||||
subPath: {{ . | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.alerting }}
|
||||
{{- range (keys .Values.alerting | sortAlpha) }}
|
||||
- name: config
|
||||
mountPath: "/etc/grafana/provisioning/alerting/{{ . }}"
|
||||
subPath: {{ . | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.dashboardProviders }}
|
||||
{{- range (keys .Values.dashboardProviders | sortAlpha) }}
|
||||
- name: config
|
||||
@ -511,11 +637,8 @@ containers:
|
||||
mountPath: {{ .mountPath }}
|
||||
{{- end }}
|
||||
ports:
|
||||
- name: {{ .Values.service.portName }}
|
||||
containerPort: {{ .Values.service.port }}
|
||||
protocol: TCP
|
||||
- name: {{ .Values.podPortName }}
|
||||
containerPort: 3000
|
||||
containerPort: {{ .Values.service.targetPort }}
|
||||
protocol: TCP
|
||||
env:
|
||||
{{- if and (not .Values.env.GF_SECURITY_ADMIN_USER) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }}
|
||||
@ -595,30 +718,40 @@ containers:
|
||||
optional: {{ .optional | default false }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- with .Values.livenessProbe }}
|
||||
livenessProbe:
|
||||
{{ toYaml .Values.livenessProbe | indent 6 }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.readinessProbe }}
|
||||
readinessProbe:
|
||||
{{ toYaml .Values.readinessProbe | indent 6 }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- if .Values.lifecycleHooks }}
|
||||
lifecycle: {{ tpl (.Values.lifecycleHooks | toYaml) . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.resources }}
|
||||
resources:
|
||||
{{ toYaml .Values.resources | indent 6 }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.extraContainers }}
|
||||
{{ tpl . $ | indent 2 }}
|
||||
{{- end }}
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{ toYaml . | indent 2 }}
|
||||
{{- toYaml . | nindent 2 }}
|
||||
{{- end }}
|
||||
{{- $root := . }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{ tpl (toYaml .) $root | indent 2 }}
|
||||
{{- end }}
|
||||
{{- with .Values.topologySpreadConstraints }}
|
||||
topologySpreadConstraints:
|
||||
{{- toYaml . | nindent 2 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{ toYaml . | indent 2 }}
|
||||
{{- toYaml . | nindent 2 }}
|
||||
{{- end }}
|
||||
volumes:
|
||||
- name: config
|
||||
@ -744,6 +877,10 @@ volumes:
|
||||
{{- else if .hostPath }}
|
||||
hostPath:
|
||||
path: {{ .hostPath }}
|
||||
{{- else if .csi }}
|
||||
csi:
|
||||
data:
|
||||
{{ toYaml .data | nindent 6 }}
|
||||
{{- else }}
|
||||
emptyDir: {}
|
||||
{{- end }}
|
||||
@ -753,6 +890,6 @@ volumes:
|
||||
emptyDir: {}
|
||||
{{- end -}}
|
||||
{{- if .Values.extraContainerVolumes }}
|
||||
{{ toYaml .Values.extraContainerVolumes | indent 2 }}
|
||||
{{ tpl (toYaml .Values.extraContainerVolumes) . | indent 2 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
@ -56,6 +56,14 @@ data:
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{- if .Values.alerting }}
|
||||
{{ $root := . }}
|
||||
{{- range $key, $value := .Values.alerting }}
|
||||
{{ $key }}: |
|
||||
{{ tpl $value $root | indent 4 }}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{- if .Values.dashboardProviders }}
|
||||
{{- range $key, $value := .Values.dashboardProviders }}
|
||||
{{ $key }}: |
|
||||
@ -86,6 +94,12 @@ data:
|
||||
{{- if $value.token }}
|
||||
-H "Authorization: token {{ $value.token }}" \
|
||||
{{- end }}
|
||||
{{- if $value.bearerToken }}
|
||||
-H "Authorization: Bearer {{ $value.bearerToken }}" \
|
||||
{{- end }}
|
||||
{{- if $value.gitlabToken }}
|
||||
-H "PRIVATE-TOKEN: {{ $value.gitlabToken }}" \
|
||||
{{- end }}
|
||||
-H "Content-Type: application/json;charset=UTF-8" \
|
||||
{{ end }}
|
||||
{{- $dpPath := "" -}}
|
||||
|
@ -46,5 +46,5 @@ spec:
|
||||
{{ toYaml . | indent 8 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- include "grafana.pod" . | nindent 6 }}
|
||||
{{- include "grafana.pod" . | indent 6 }}
|
||||
{{- end }}
|
||||
|
@ -18,5 +18,5 @@ spec:
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 3000
|
||||
targetPort: 3000
|
||||
targetPort: {{ .Values.service.targetPort }}
|
||||
{{- end }}
|
||||
|
@ -77,7 +77,7 @@ spec:
|
||||
{{- end}}
|
||||
ports:
|
||||
- name: {{ .Values.imageRenderer.service.portName }}
|
||||
containerPort: {{ .Values.imageRenderer.service.port }}
|
||||
containerPort: {{ .Values.imageRenderer.service.targetPort }}
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
@ -85,7 +85,7 @@ spec:
|
||||
port: {{ .Values.imageRenderer.service.portName }}
|
||||
env:
|
||||
- name: HTTP_PORT
|
||||
value: {{ .Values.imageRenderer.service.port | quote }}
|
||||
value: {{ .Values.imageRenderer.service.targetPort | quote }}
|
||||
{{- range $key, $value := .Values.imageRenderer.env }}
|
||||
- name: {{ $key | quote }}
|
||||
value: {{ $value | quote }}
|
||||
|
@ -19,7 +19,7 @@ spec:
|
||||
- Ingress
|
||||
ingress:
|
||||
- ports:
|
||||
- port: {{ .Values.imageRenderer.service.port }}
|
||||
- port: {{ .Values.imageRenderer.service.targetPort }}
|
||||
protocol: TCP
|
||||
from:
|
||||
- namespaceSelector:
|
||||
|
@ -24,6 +24,9 @@ spec:
|
||||
port: {{ .Values.imageRenderer.service.port }}
|
||||
protocol: TCP
|
||||
targetPort: {{ .Values.imageRenderer.service.targetPort }}
|
||||
{{- if .Values.imageRenderer.appProtocol }}
|
||||
appProtocol: {{ .Values.imageRenderer.appProtocol }}
|
||||
{{- end }}
|
||||
selector:
|
||||
{{- include "grafana.imageRenderer.selectorLabels" . | nindent 4 }}
|
||||
{{ end }}
|
||||
|
@ -6,13 +6,13 @@ metadata:
|
||||
namespace: {{ template "grafana.namespace" . }}
|
||||
labels:
|
||||
{{- include "grafana.labels" . | nindent 4 }}
|
||||
{{- if .Values.labels }}
|
||||
{{ toYaml .Values.labels | indent 4 }}
|
||||
{{- end }}
|
||||
{{- with .Values.annotations }}
|
||||
{{- with .Values.labels }}
|
||||
{{ toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- with .Values.annotations }}
|
||||
annotations:
|
||||
{{ toYaml . | indent 4 }}
|
||||
{{- end }}
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
policyTypes:
|
||||
{{- if .Values.networkPolicy.ingress }}
|
||||
@ -39,10 +39,10 @@ spec:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
{{ template "grafana.fullname" . }}-client: "true"
|
||||
{{- if .Values.networkPolicy.explicitNamespacesSelector }}
|
||||
namespaceSelector:
|
||||
{{ toYaml .Values.networkPolicy.explicitNamespacesSelector | indent 12 }}
|
||||
{{- end }}
|
||||
{{- with .Values.networkPolicy.explicitNamespacesSelector }}
|
||||
- namespaceSelector:
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
{{- include "grafana.labels" . | nindent 14 }}
|
||||
|
@ -1,5 +1,6 @@
|
||||
{{- if .Values.rbac.pspEnabled }}
|
||||
apiVersion: {{ include "grafana.podSecurityPolicy.apiVersion" . }}
|
||||
{{- if .Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy" }}
|
||||
apiVersion: policy/v1beta1
|
||||
kind: PodSecurityPolicy
|
||||
metadata:
|
||||
name: {{ template "grafana.fullname" . }}
|
||||
@ -47,3 +48,4 @@ spec:
|
||||
max: 65535
|
||||
readOnlyRootFilesystem: false
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
@ -9,9 +9,10 @@ metadata:
|
||||
{{- if .Values.service.labels }}
|
||||
{{ toYaml .Values.service.labels | indent 4 }}
|
||||
{{- end }}
|
||||
{{- $root := . }}
|
||||
{{- with .Values.service.annotations }}
|
||||
annotations:
|
||||
{{ toYaml . | indent 4 }}
|
||||
{{ tpl (toYaml . | indent 4) $root }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- if (or (eq .Values.service.type "ClusterIP") (empty .Values.service.type)) }}
|
||||
@ -40,12 +41,15 @@ spec:
|
||||
port: {{ .Values.service.port }}
|
||||
protocol: TCP
|
||||
targetPort: {{ .Values.service.targetPort }}
|
||||
{{ if (and (eq .Values.service.type "NodePort") (not (empty .Values.service.nodePort))) }}
|
||||
{{- if .Values.service.appProtocol }}
|
||||
appProtocol: {{ .Values.service.appProtocol }}
|
||||
{{- end }}
|
||||
{{- if (and (eq .Values.service.type "NodePort") (not (empty .Values.service.nodePort))) }}
|
||||
nodePort: {{.Values.service.nodePort}}
|
||||
{{ end }}
|
||||
{{- if .Values.extraExposePorts }}
|
||||
{{- tpl (toYaml .Values.extraExposePorts) . | indent 4 }}
|
||||
{{- end }}
|
||||
{{ end }}
|
||||
{{- if .Values.extraExposePorts }}
|
||||
{{- tpl (toYaml .Values.extraExposePorts) . | nindent 4 }}
|
||||
{{- end }}
|
||||
selector:
|
||||
{{- include "grafana.selectorLabels" . | nindent 4 }}
|
||||
{{ end }}
|
||||
|
@ -5,7 +5,7 @@ kind: ServiceMonitor
|
||||
metadata:
|
||||
name: {{ template "grafana.fullname" . }}
|
||||
{{- if .Values.serviceMonitor.namespace }}
|
||||
namespace: {{ .Values.serviceMonitor.namespace }}
|
||||
namespace: {{ tpl .Values.serviceMonitor.namespace . }}
|
||||
{{- else }}
|
||||
namespace: {{ template "grafana.namespace" . }}
|
||||
{{- end }}
|
||||
|
@ -11,7 +11,7 @@ data:
|
||||
@test "Test Health" {
|
||||
url="http://{{ template "grafana.fullname" . }}/api/health"
|
||||
|
||||
code=$(wget --server-response --spider --timeout 10 --tries 1 ${url} 2>&1 | awk '/^ HTTP/{print $2}')
|
||||
code=$(wget --server-response --spider --timeout 90 --tries 10 ${url} 2>&1 | awk '/^ HTTP/{print $2}')
|
||||
[ "$code" == "200" ]
|
||||
}
|
||||
{{- end }}
|
||||
|
@ -1,4 +1,5 @@
|
||||
{{- if and .Values.testFramework.enabled .Values.rbac.pspEnabled }}
|
||||
{{- if .Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy" }}
|
||||
apiVersion: policy/v1beta1
|
||||
kind: PodSecurityPolicy
|
||||
metadata:
|
||||
@ -27,3 +28,4 @@ spec:
|
||||
- csi
|
||||
- secret
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
@ -136,6 +136,7 @@ downloadDashboards:
|
||||
env: {}
|
||||
envFromSecret: ""
|
||||
resources: {}
|
||||
securityContext: {}
|
||||
|
||||
## Pod Annotations
|
||||
# podAnnotations: {}
|
||||
@ -158,9 +159,12 @@ service:
|
||||
port: 80
|
||||
targetPort: 3000
|
||||
# targetPort: 4181 To be used with a proxy extraContainer
|
||||
## Service annotations. Can be templated.
|
||||
annotations: {}
|
||||
labels: {}
|
||||
portName: service
|
||||
# Adds the appProtocol field to the service. This allows to work with istio protocol selection. Ex: "http" or "tcp"
|
||||
appProtocol: ""
|
||||
|
||||
serviceMonitor:
|
||||
## If true, a ServiceMonitor CRD is created for a prometheus operator
|
||||
@ -249,6 +253,11 @@ tolerations: []
|
||||
##
|
||||
affinity: {}
|
||||
|
||||
## Topology Spread Constraints
|
||||
## ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/
|
||||
##
|
||||
topologySpreadConstraints: []
|
||||
|
||||
## Additional init containers (evaluated as template)
|
||||
## ref: https://kubernetes.io/docs/concepts/workloads/pods/init-containers/
|
||||
##
|
||||
@ -468,6 +477,13 @@ extraVolumeMounts: []
|
||||
# mountPath: /mnt/volume1
|
||||
# readOnly: true
|
||||
# hostPath: /usr/shared/
|
||||
# - name: grafana-secrets
|
||||
# csi: true
|
||||
# data:
|
||||
# driver: secrets-store.csi.k8s.io
|
||||
# readOnly: true
|
||||
# volumeAttributes:
|
||||
# secretProviderClass: "grafana-env-spc"
|
||||
|
||||
## Container Lifecycle Hooks. Execute a specific bash command or make an HTTP request
|
||||
lifecycleHooks: {}
|
||||
@ -502,6 +518,71 @@ datasources: {}
|
||||
# authType: default
|
||||
# defaultRegion: us-east-1
|
||||
|
||||
## Configure grafana alerting (can be templated)
|
||||
## ref: http://docs.grafana.org/administration/provisioning/#alerting
|
||||
##
|
||||
alerting: {}
|
||||
# rules.yaml: |
|
||||
# apiVersion: 1
|
||||
# groups:
|
||||
# - orgId: 1
|
||||
# name: {{ .Chart.Name }}_my_rule_group
|
||||
# folder: my_first_folder
|
||||
# interval: 60s
|
||||
# rules:
|
||||
# - uid: my_id_1
|
||||
# title: my_first_rule
|
||||
# condition: A
|
||||
# data:
|
||||
# - refId: A
|
||||
# datasourceUid: '-100'
|
||||
# model:
|
||||
# conditions:
|
||||
# - evaluator:
|
||||
# params:
|
||||
# - 3
|
||||
# type: gt
|
||||
# operator:
|
||||
# type: and
|
||||
# query:
|
||||
# params:
|
||||
# - A
|
||||
# reducer:
|
||||
# type: last
|
||||
# type: query
|
||||
# datasource:
|
||||
# type: __expr__
|
||||
# uid: '-100'
|
||||
# expression: 1==0
|
||||
# intervalMs: 1000
|
||||
# maxDataPoints: 43200
|
||||
# refId: A
|
||||
# type: math
|
||||
# dashboardUid: my_dashboard
|
||||
# panelId: 123
|
||||
# noDataState: Alerting
|
||||
# for: 60s
|
||||
# annotations:
|
||||
# some_key: some_value
|
||||
# labels:
|
||||
# team: sre_team_1
|
||||
# contactpoints.yaml: |
|
||||
# apiVersion: 1
|
||||
# contactPoints:
|
||||
# - orgId: 1
|
||||
# name: cp_1
|
||||
# receivers:
|
||||
# - uid: first_uid
|
||||
# type: pagerduty
|
||||
# settings:
|
||||
# integrationKey: XXX
|
||||
# severity: critical
|
||||
# class: ping failure
|
||||
# component: Grafana
|
||||
# group: app-stack
|
||||
# summary: |
|
||||
# {{ `{{ template "default.message" . }}` }}
|
||||
|
||||
## Configure notifiers
|
||||
## ref: http://docs.grafana.org/administration/provisioning/#alert-notification-channels
|
||||
##
|
||||
@ -562,6 +643,12 @@ dashboards: {}
|
||||
# url: https://example.com/repository/test-b64.json
|
||||
# token: ''
|
||||
# b64content: true
|
||||
# local-dashboard-gitlab:
|
||||
# url: https://example.com/repository/test-gitlab.json
|
||||
# gitlabToken: ''
|
||||
# local-dashboard-bitbucket:
|
||||
# url: https://example.com/repository/test-bitbucket.json
|
||||
# bearerToken: ''
|
||||
|
||||
## Reference to external ConfigMap per provider. Use provider name as key and ConfigMap name as value.
|
||||
## A provider dashboards must be defined either by external ConfigMaps or in values.yaml, not in both.
|
||||
@ -590,6 +677,8 @@ grafana.ini:
|
||||
mode: console
|
||||
grafana_net:
|
||||
url: https://grafana.net
|
||||
server:
|
||||
domain: "{{ if (and .Values.ingress.enabled .Values.ingress.hosts) }}{{ .Values.ingress.hosts | first }}{{ end }}"
|
||||
## grafana Authentication can be enabled with the following values on grafana.ini
|
||||
# server:
|
||||
# The full public facing url you use in browser, used for redirects and emails
|
||||
@ -667,15 +756,21 @@ sidecar:
|
||||
enableUniqueFilenames: false
|
||||
readinessProbe: {}
|
||||
livenessProbe: {}
|
||||
# Log level. Can be one of: DEBUG, INFO, WARN, ERROR, CRITICAL.
|
||||
logLevel: INFO
|
||||
# Log level default for all sidecars. Can be one of: DEBUG, INFO, WARN, ERROR, CRITICAL. Defaults to INFO
|
||||
# logLevel: INFO
|
||||
dashboards:
|
||||
enabled: false
|
||||
# Additional environment variables for the dashboards sidecar
|
||||
env: {}
|
||||
# Do not reprocess already processed unchanged resources on k8s API reconnect.
|
||||
# ignoreAlreadyProcessed: true
|
||||
SCProvider: true
|
||||
# label that the configmaps with dashboards are marked with
|
||||
label: grafana_dashboard
|
||||
# value of label that the configmaps with dashboards are set to
|
||||
labelValue: ""
|
||||
# Log level. Can be one of: DEBUG, INFO, WARN, ERROR, CRITICAL.
|
||||
# logLevel: INFO
|
||||
# folder in the pod that should hold the collected dashboards (unless `defaultFolderName` is set)
|
||||
folder: /tmp/dashboards
|
||||
# The default folder name, it will create a subfolder under the `folder` and put dashboards in there instead
|
||||
@ -725,10 +820,16 @@ sidecar:
|
||||
sizeLimit: {}
|
||||
datasources:
|
||||
enabled: false
|
||||
# Additional environment variables for the datasourcessidecar
|
||||
env: {}
|
||||
# Do not reprocess already processed unchanged resources on k8s API reconnect.
|
||||
# ignoreAlreadyProcessed: true
|
||||
# label that the configmaps with datasources are marked with
|
||||
label: grafana_datasource
|
||||
# value of label that the configmaps with datasources are set to
|
||||
labelValue: ""
|
||||
# Log level. Can be one of: DEBUG, INFO, WARN, ERROR, CRITICAL.
|
||||
# logLevel: INFO
|
||||
# If specified, the sidecar will search for datasource config-maps inside this namespace.
|
||||
# Otherwise the namespace in which the sidecar is running will be used.
|
||||
# It's also possible to specify ALL to search in all namespaces
|
||||
@ -737,8 +838,20 @@ sidecar:
|
||||
watchMethod: WATCH
|
||||
# search in configmap, secret or both
|
||||
resource: both
|
||||
# watchServerTimeout: request to the server, asking it to cleanly close the connection after that.
|
||||
# defaults to 60sec; much higher values like 3600 seconds (1h) are feasible for non-Azure K8S
|
||||
# watchServerTimeout: 3600
|
||||
#
|
||||
# watchClientTimeout: is a client-side timeout, configuring your local socket.
|
||||
# If you have a network outage dropping all packets with no RST/FIN,
|
||||
# this is how long your client waits before realizing & dropping the connection.
|
||||
# defaults to 66sec (sic!)
|
||||
# watchClientTimeout: 60
|
||||
#
|
||||
# Endpoint to send request to reload datasources
|
||||
reloadURL: "http://localhost:3000/api/admin/provisioning/datasources/reload"
|
||||
# Absolute path to shell script to execute after a datasource got reloaded
|
||||
script: null
|
||||
skipReload: false
|
||||
# Deploy the datasource sidecar as an initContainer in addition to a container.
|
||||
# This is needed if skipReload is true, to load any datasources defined at startup time.
|
||||
@ -747,10 +860,16 @@ sidecar:
|
||||
sizeLimit: {}
|
||||
plugins:
|
||||
enabled: false
|
||||
# Additional environment variables for the plugins sidecar
|
||||
env: {}
|
||||
# Do not reprocess already processed unchanged resources on k8s API reconnect.
|
||||
# ignoreAlreadyProcessed: true
|
||||
# label that the configmaps with plugins are marked with
|
||||
label: grafana_plugin
|
||||
# value of label that the configmaps with plugins are set to
|
||||
labelValue: ""
|
||||
# Log level. Can be one of: DEBUG, INFO, WARN, ERROR, CRITICAL.
|
||||
# logLevel: INFO
|
||||
# If specified, the sidecar will search for plugin config-maps inside this namespace.
|
||||
# Otherwise the namespace in which the sidecar is running will be used.
|
||||
# It's also possible to specify ALL to search in all namespaces
|
||||
@ -759,8 +878,20 @@ sidecar:
|
||||
watchMethod: WATCH
|
||||
# search in configmap, secret or both
|
||||
resource: both
|
||||
# watchServerTimeout: request to the server, asking it to cleanly close the connection after that.
|
||||
# defaults to 60sec; much higher values like 3600 seconds (1h) are feasible for non-Azure K8S
|
||||
# watchServerTimeout: 3600
|
||||
#
|
||||
# watchClientTimeout: is a client-side timeout, configuring your local socket.
|
||||
# If you have a network outage dropping all packets with no RST/FIN,
|
||||
# this is how long your client waits before realizing & dropping the connection.
|
||||
# defaults to 66sec (sic!)
|
||||
# watchClientTimeout: 60
|
||||
#
|
||||
# Endpoint to send request to reload plugins
|
||||
reloadURL: "http://localhost:3000/api/admin/provisioning/plugins/reload"
|
||||
# Absolute path to shell script to execute after a plugin got reloaded
|
||||
script: null
|
||||
skipReload: false
|
||||
# Deploy the datasource sidecar as an initContainer in addition to a container.
|
||||
# This is needed if skipReload is true, to load any plugins defined at startup time.
|
||||
@ -769,8 +900,16 @@ sidecar:
|
||||
sizeLimit: {}
|
||||
notifiers:
|
||||
enabled: false
|
||||
# Additional environment variables for the notifierssidecar
|
||||
env: {}
|
||||
# Do not reprocess already processed unchanged resources on k8s API reconnect.
|
||||
# ignoreAlreadyProcessed: true
|
||||
# label that the configmaps with notifiers are marked with
|
||||
label: grafana_notifier
|
||||
# value of label that the configmaps with notifiers are set to
|
||||
labelValue: ""
|
||||
# Log level. Can be one of: DEBUG, INFO, WARN, ERROR, CRITICAL.
|
||||
# logLevel: INFO
|
||||
# If specified, the sidecar will search for notifier config-maps inside this namespace.
|
||||
# Otherwise the namespace in which the sidecar is running will be used.
|
||||
# It's also possible to specify ALL to search in all namespaces
|
||||
@ -824,6 +963,8 @@ imageRenderer:
|
||||
# image-renderer service port used by both service and deployment
|
||||
port: 8081
|
||||
targetPort: 8081
|
||||
# Adds the appProtocol field to the image-renderer service. This allows to work with istio protocol selection. Ex: "http" or "tcp"
|
||||
appProtocol: ""
|
||||
# If https is enabled in Grafana, this needs to be set as 'https' to correctly configure the callback used in Grafana
|
||||
grafanaProtocol: http
|
||||
# In case a sub_path is used this needs to be added to the image renderer callback
|
||||
|
@ -1,5 +1,5 @@
|
||||
apiVersion: v2
|
||||
appVersion: 2.5.0
|
||||
appVersion: 2.6.0
|
||||
description: Install kube-state-metrics to generate and expose cluster-level metrics
|
||||
home: https://github.com/kubernetes/kube-state-metrics/
|
||||
keywords:
|
||||
@ -18,4 +18,4 @@ name: kube-state-metrics
|
||||
sources:
|
||||
- https://github.com/kubernetes/kube-state-metrics/
|
||||
type: application
|
||||
version: 4.15.0
|
||||
version: 4.18.0
|
||||
|
@ -92,7 +92,9 @@ spec:
|
||||
{{- if .Values.selfMonitor.telemetryHost }}
|
||||
- --telemetry-host={{ .Values.selfMonitor.telemetryHost }}
|
||||
{{- end }}
|
||||
{{- if .Values.selfMonitor.telemetryPort }}
|
||||
- --telemetry-port={{ .Values.selfMonitor.telemetryPort | default 8081 }}
|
||||
{{- end }}
|
||||
{{- if or (.Values.kubeconfig.enabled) (.Values.volumeMounts) }}
|
||||
volumeMounts:
|
||||
{{- if .Values.kubeconfig.enabled }}
|
||||
@ -105,7 +107,11 @@ spec:
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
{{- if .Values.image.sha }}
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}@sha256:{{ .Values.image.sha }}"
|
||||
{{- else }}
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
|
||||
{{- end }}
|
||||
ports:
|
||||
- containerPort: {{ .Values.service.port | default 8080}}
|
||||
name: "http"
|
||||
|
@ -2,7 +2,8 @@
|
||||
prometheusScrape: true
|
||||
image:
|
||||
repository: registry.k8s.io/kube-state-metrics/kube-state-metrics
|
||||
tag: v2.5.0
|
||||
tag: v2.6.0
|
||||
sha: ""
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
imagePullSecrets: []
|
||||
|
@ -15,4 +15,4 @@ name: prometheus-node-exporter
|
||||
sources:
|
||||
- https://github.com/prometheus/node_exporter/
|
||||
type: application
|
||||
version: 3.3.1
|
||||
version: 4.2.0
|
||||
|
@ -1,17 +1,17 @@
|
||||
# Prometheus Node Exporter
|
||||
# Prometheus `Node Exporter`
|
||||
|
||||
Prometheus exporter for hardware and OS metrics exposed by *NIX kernels, written in Go with pluggable metric collectors.
|
||||
|
||||
This chart bootstraps a prometheus [Node Exporter](http://github.com/prometheus/node_exporter) deployment on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager.
|
||||
This chart bootstraps a prometheus [`Node Exporter`](http://github.com/prometheus/node_exporter) daemonset on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager.
|
||||
|
||||
## Get Repo Info
|
||||
## Get Repository Info
|
||||
|
||||
```console
|
||||
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
||||
helm repo update
|
||||
```
|
||||
|
||||
_See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation._
|
||||
_See [`helm repo`](https://helm.sh/docs/helm/helm_repo/) for command documentation._
|
||||
|
||||
## Install Chart
|
||||
|
||||
@ -19,7 +19,7 @@ _See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation
|
||||
helm install [RELEASE_NAME] prometheus-community/prometheus-node-exporter
|
||||
```
|
||||
|
||||
_See [configuration](#configuration) below._
|
||||
_See [configuration](#configuring) below._
|
||||
|
||||
_See [helm install](https://helm.sh/docs/helm/helm_install/) for command documentation._
|
||||
|
||||
@ -41,6 +41,17 @@ helm upgrade [RELEASE_NAME] [CHART] --install
|
||||
|
||||
_See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documentation._
|
||||
|
||||
### 3.x to 4.x
|
||||
|
||||
Starting from version 4.0.0, the `node exporter` chart is using the [Kubernetes recommended labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/common-labels/). Therefore you have to delete the daemonset before you upgrade.
|
||||
|
||||
```console
|
||||
kubectl delete daemonset -l app=prometheus-node-exporter
|
||||
helm upgrade -i prometheus-node-exporter prometheus-community/prometheus-node-exporter
|
||||
```
|
||||
|
||||
If you use your own custom [ServiceMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#servicemonitor) or [PodMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#podmonitor), please ensure to upgrade their `selector` fields accordingly to the new labels.
|
||||
|
||||
### From 2.x to 3.x
|
||||
|
||||
Change the following:
|
||||
|
@ -9,7 +9,7 @@
|
||||
export SERVICE_IP=$(kubectl get svc --namespace {{ template "prometheus-node-exporter.namespace" . }} {{ template "prometheus-node-exporter.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
|
||||
echo http://$SERVICE_IP:{{ .Values.service.port }}
|
||||
{{- else if contains "ClusterIP" .Values.service.type }}
|
||||
export POD_NAME=$(kubectl get pods --namespace {{ template "prometheus-node-exporter.namespace" . }} -l "app={{ template "prometheus-node-exporter.name" . }},release={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
||||
export POD_NAME=$(kubectl get pods --namespace {{ template "prometheus-node-exporter.namespace" . }} -l "app.kubernetes.io/name={{ template "prometheus-node-exporter.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
||||
echo "Visit http://127.0.0.1:9100 to use your application"
|
||||
kubectl port-forward --namespace {{ template "prometheus-node-exporter.namespace" . }} $POD_NAME 9100
|
||||
{{- end }}
|
||||
|
@ -26,13 +26,28 @@ If release name contains chart name it will be used as a full name.
|
||||
|
||||
{{/* Generate basic labels */}}
|
||||
{{- define "prometheus-node-exporter.labels" }}
|
||||
app: {{ template "prometheus-node-exporter.name" . }}
|
||||
heritage: {{.Release.Service }}
|
||||
release: {{.Release.Name }}
|
||||
chart: {{ template "prometheus-node-exporter.chart" . }}
|
||||
helm.sh/chart: {{ template "prometheus-node-exporter.chart" . }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
app.kubernetes.io/component: metrics
|
||||
app.kubernetes.io/part-of: {{ template "prometheus-node-exporter.name" . }}
|
||||
{{- include "prometheus-node-exporter.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.podLabels}}
|
||||
{{ toYaml .Values.podLabels }}
|
||||
{{- end }}
|
||||
{{- if .Values.releaseLabel }}
|
||||
release: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Selector labels
|
||||
*/}}
|
||||
{{- define "prometheus-node-exporter.selectorLabels" }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
app.kubernetes.io/name: {{ template "prometheus-node-exporter.name" . }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
@ -58,8 +73,12 @@ Create the name of the service account to use
|
||||
The image to use
|
||||
*/}}
|
||||
{{- define "prometheus-node-exporter.image" -}}
|
||||
{{- if .Values.image.sha -}}
|
||||
{{- printf "%s:%s@%s" .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) .Values.image.sha }}
|
||||
{{- else -}}
|
||||
{{- printf "%s:%s" .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Allow the release namespace to be overridden for multi-namespace deployments in combined charts
|
||||
|
@ -7,8 +7,7 @@ metadata:
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: {{ template "prometheus-node-exporter.name" . }}
|
||||
release: {{ .Release.Name }}
|
||||
{{- include "prometheus-node-exporter.selectorLabels" . | indent 6 }}
|
||||
{{- if .Values.updateStrategy }}
|
||||
updateStrategy:
|
||||
{{ toYaml .Values.updateStrategy | indent 4 }}
|
||||
@ -147,13 +146,21 @@ spec:
|
||||
{{- end }}
|
||||
{{- if .Values.sidecars }}
|
||||
{{ toYaml .Values.sidecars | indent 8 }}
|
||||
{{- if .Values.sidecarVolumeMount }}
|
||||
{{- if or .Values.sidecarVolumeMount .Values.sidecarHostVolumeMounts }}
|
||||
volumeMounts:
|
||||
{{- range $_, $mount := .Values.sidecarVolumeMount }}
|
||||
- name: {{ $mount.name }}
|
||||
mountPath: {{ $mount.mountPath }}
|
||||
readOnly: {{ $mount.readOnly }}
|
||||
{{- end }}
|
||||
{{- range $_, $mount := .Values.sidecarHostVolumeMounts }}
|
||||
- name: {{ $mount.name }}
|
||||
mountPath: {{ $mount.mountPath }}
|
||||
readOnly: {{ $mount.readOnly }}
|
||||
{{- if $mount.mountPropagation }}
|
||||
mountPropagation: {{ $mount.mountPropagation }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.imagePullSecrets }}
|
||||
@ -204,6 +211,13 @@ spec:
|
||||
medium: Memory
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.sidecarHostVolumeMounts }}
|
||||
{{- range $_, $mount := .Values.sidecarHostVolumeMounts }}
|
||||
- name: {{ $mount.name }}
|
||||
hostPath:
|
||||
path: {{ $mount.hostPath }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.configmaps }}
|
||||
{{- range $_, $mount := .Values.configmaps }}
|
||||
- name: {{ $mount.name }}
|
||||
|
@ -4,8 +4,7 @@ kind: Endpoints
|
||||
metadata:
|
||||
name: {{ template "prometheus-node-exporter.fullname" . }}
|
||||
namespace: {{ template "prometheus-node-exporter.namespace" . }}
|
||||
labels:
|
||||
{{ include "prometheus-node-exporter.labels" . | indent 4 }}
|
||||
labels: {{ include "prometheus-node-exporter.labels" . | indent 4 }}
|
||||
subsets:
|
||||
- addresses:
|
||||
{{- range .Values.endpoints }}
|
||||
|
@ -3,11 +3,11 @@ kind: Service
|
||||
metadata:
|
||||
name: {{ template "prometheus-node-exporter.fullname" . }}
|
||||
namespace: {{ template "prometheus-node-exporter.namespace" . }}
|
||||
labels: {{ include "prometheus-node-exporter.labels" . | indent 4 }}
|
||||
{{- if .Values.service.annotations }}
|
||||
annotations:
|
||||
{{ toYaml .Values.service.annotations | indent 4 }}
|
||||
{{- end }}
|
||||
labels: {{ include "prometheus-node-exporter.labels" . | indent 4 }}
|
||||
spec:
|
||||
type: {{ .Values.service.type }}
|
||||
ports:
|
||||
@ -19,5 +19,4 @@ spec:
|
||||
protocol: TCP
|
||||
name: {{ .Values.service.portName }}
|
||||
selector:
|
||||
app: {{ template "prometheus-node-exporter.name" . }}
|
||||
release: {{ .Release.Name }}
|
||||
{{- include "prometheus-node-exporter.selectorLabels" . | indent 4 }}
|
||||
|
@ -5,11 +5,7 @@ kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ template "prometheus-node-exporter.serviceAccountName" . }}
|
||||
namespace: {{ template "prometheus-node-exporter.namespace" . }}
|
||||
labels:
|
||||
app: {{ template "prometheus-node-exporter.name" . }}
|
||||
chart: {{ template "prometheus-node-exporter.chart" . }}
|
||||
release: "{{ .Release.Name }}"
|
||||
heritage: "{{ .Release.Service }}"
|
||||
labels: {{ include "prometheus-node-exporter.labels" . | indent 4 }}
|
||||
annotations:
|
||||
{{ toYaml .Values.serviceAccount.annotations | indent 4 }}
|
||||
imagePullSecrets:
|
||||
|
@ -15,8 +15,7 @@ spec:
|
||||
{{- if .Values.prometheus.monitor.selectorOverride }}
|
||||
{{ toYaml .Values.prometheus.monitor.selectorOverride | indent 6 }}
|
||||
{{ else }}
|
||||
app: {{ template "prometheus-node-exporter.name" . }}
|
||||
release: {{ .Release.Name }}
|
||||
{{ include "prometheus-node-exporter.selectorLabels" . | indent 6 }}
|
||||
{{- end }}
|
||||
endpoints:
|
||||
- port: {{ .Values.service.portName }}
|
||||
|
@ -6,6 +6,7 @@ image:
|
||||
# Overrides the image tag whose default is {{ printf "v%s" .Chart.AppVersion }}
|
||||
tag: ""
|
||||
pullPolicy: IfNotPresent
|
||||
sha: ""
|
||||
|
||||
imagePullSecrets: []
|
||||
# - name: "image-pull-secret"
|
||||
@ -139,6 +140,9 @@ podAnnotations:
|
||||
# Extra labels to be added to node exporter pods
|
||||
podLabels: {}
|
||||
|
||||
## set to true to add the release label so scraping of the servicemonitor with kube-prometheus-stack works out of the box
|
||||
releaseLabel: false
|
||||
|
||||
# Custom DNS configuration to be added to prometheus-node-exporter pods
|
||||
dnsConfig: {}
|
||||
# nameservers:
|
||||
@ -170,7 +174,7 @@ extraArgs: []
|
||||
# - --collector.diskstats.ignored-devices=^(ram|loop|fd|(h|s|v)d[a-z]|nvme\\d+n\\d+p)\\d+$
|
||||
# - --collector.textfile.directory=/run/prometheus
|
||||
|
||||
## Additional mounts from the host
|
||||
## Additional mounts from the host to node-exporter container
|
||||
##
|
||||
extraHostVolumeMounts: []
|
||||
# - name: <mountName>
|
||||
@ -204,6 +208,15 @@ sidecarVolumeMount: []
|
||||
## mountPath: /run/prometheus
|
||||
## readOnly: false
|
||||
|
||||
## Additional mounts from the host to sidecar containers
|
||||
##
|
||||
sidecarHostVolumeMounts: []
|
||||
# - name: <mountName>
|
||||
# hostPath: <hostPath>
|
||||
# mountPath: <mountPath>
|
||||
# readOnly: true|false
|
||||
# mountPropagation: None|HostToContainer|Bidirectional
|
||||
|
||||
## Additional InitContainers to initialize the pod
|
||||
##
|
||||
extraInitContainers: []
|
||||
|
@ -1,4 +1,4 @@
|
||||
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
|
||||
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
|
@ -1,4 +1,4 @@
|
||||
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml
|
||||
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
@ -981,15 +981,365 @@ spec:
|
||||
x-kubernetes-map-type: atomic
|
||||
alertmanagerConfiguration:
|
||||
description: 'EXPERIMENTAL: alertmanagerConfiguration specifies the
|
||||
global Alertmanager configuration. If defined, it takes precedence
|
||||
over the `configSecret` field. This field may change in future releases.'
|
||||
configuration of Alertmanager. If defined, it takes precedence over
|
||||
the `configSecret` field. This field may change in future releases.'
|
||||
properties:
|
||||
global:
|
||||
description: Defines the global parameters of the Alertmanager
|
||||
configuration.
|
||||
properties:
|
||||
httpConfig:
|
||||
description: HTTP client configuration.
|
||||
properties:
|
||||
authorization:
|
||||
description: Authorization header configuration for the
|
||||
client. This is mutually exclusive with BasicAuth and
|
||||
is only available starting from Alertmanager v0.22+.
|
||||
properties:
|
||||
credentials:
|
||||
description: The secret's key that contains the credentials
|
||||
of the request
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info:
|
||||
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind,
|
||||
uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its
|
||||
key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type:
|
||||
description: Set the authentication type. Defaults
|
||||
to Bearer, Basic will cause an error
|
||||
type: string
|
||||
type: object
|
||||
basicAuth:
|
||||
description: BasicAuth for the client. This is mutually
|
||||
exclusive with Authorization. If both are defined, BasicAuth
|
||||
takes precedence.
|
||||
properties:
|
||||
password:
|
||||
description: The secret in the service monitor namespace
|
||||
that contains the password for authentication.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info:
|
||||
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind,
|
||||
uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its
|
||||
key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
username:
|
||||
description: The secret in the service monitor namespace
|
||||
that contains the username for authentication.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info:
|
||||
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind,
|
||||
uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its
|
||||
key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type: object
|
||||
bearerTokenSecret:
|
||||
description: The secret's key that contains the bearer
|
||||
token to be used by the client for authentication. The
|
||||
secret needs to be in the same namespace as the Alertmanager
|
||||
object and accessible by the Prometheus Operator.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind,
|
||||
uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key
|
||||
must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
followRedirects:
|
||||
description: FollowRedirects specifies whether the client
|
||||
should follow HTTP 3xx redirects.
|
||||
type: boolean
|
||||
oauth2:
|
||||
description: OAuth2 client credentials used to fetch a
|
||||
token for the targets.
|
||||
properties:
|
||||
clientId:
|
||||
description: The secret or configmap containing the
|
||||
OAuth2 client id
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use
|
||||
for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key to select.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info:
|
||||
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion,
|
||||
kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the ConfigMap
|
||||
or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
secret:
|
||||
description: Secret containing data to use for
|
||||
the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select
|
||||
from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info:
|
||||
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion,
|
||||
kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or
|
||||
its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type: object
|
||||
clientSecret:
|
||||
description: The secret containing the OAuth2 client
|
||||
secret
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info:
|
||||
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind,
|
||||
uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its
|
||||
key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
endpointParams:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Parameters to append to the token URL
|
||||
type: object
|
||||
scopes:
|
||||
description: OAuth2 scopes used for the token request
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
tokenUrl:
|
||||
description: The URL to fetch the token from
|
||||
minLength: 1
|
||||
type: string
|
||||
required:
|
||||
- clientId
|
||||
- clientSecret
|
||||
- tokenUrl
|
||||
type: object
|
||||
proxyURL:
|
||||
description: Optional proxy URL.
|
||||
type: string
|
||||
tlsConfig:
|
||||
description: TLS configuration for the client.
|
||||
properties:
|
||||
ca:
|
||||
description: Struct containing the CA cert to use
|
||||
for the targets.
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use
|
||||
for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key to select.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info:
|
||||
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion,
|
||||
kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the ConfigMap
|
||||
or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
secret:
|
||||
description: Secret containing data to use for
|
||||
the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select
|
||||
from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info:
|
||||
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion,
|
||||
kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or
|
||||
its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type: object
|
||||
cert:
|
||||
description: Struct containing the client cert file
|
||||
for the targets.
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use
|
||||
for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key to select.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info:
|
||||
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion,
|
||||
kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the ConfigMap
|
||||
or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
secret:
|
||||
description: Secret containing data to use for
|
||||
the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select
|
||||
from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info:
|
||||
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion,
|
||||
kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or
|
||||
its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type: object
|
||||
insecureSkipVerify:
|
||||
description: Disable target certificate validation.
|
||||
type: boolean
|
||||
keySecret:
|
||||
description: Secret containing the client key file
|
||||
for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info:
|
||||
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind,
|
||||
uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its
|
||||
key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
serverName:
|
||||
description: Used to verify the hostname for the targets.
|
||||
type: string
|
||||
type: object
|
||||
type: object
|
||||
resolveTimeout:
|
||||
description: ResolveTimeout is the default value used by alertmanager
|
||||
if the alert does not include EndsAt, after this time passes
|
||||
it can declare the alert as resolved if it has not been
|
||||
updated. This has no impact on alerts from Prometheus, as
|
||||
they always include EndsAt.
|
||||
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
|
||||
type: string
|
||||
type: object
|
||||
name:
|
||||
description: The name of the AlertmanagerConfig resource which
|
||||
is used to generate the global configuration. It must be defined
|
||||
in the same namespace as the Alertmanager object. The operator
|
||||
will not enforce a `namespace` label for routes and inhibition
|
||||
rules.
|
||||
is used to generate the Alertmanager configuration. It must
|
||||
be defined in the same namespace as the Alertmanager object.
|
||||
The operator will not enforce a `namespace` label for routes
|
||||
and inhibition rules.
|
||||
minLength: 1
|
||||
type: string
|
||||
type: object
|
||||
@ -1595,13 +1945,13 @@ spec:
|
||||
Cannot be updated.
|
||||
type: string
|
||||
ports:
|
||||
description: List of ports to expose from the container. Exposing
|
||||
a port here gives the system additional information about
|
||||
the network connections a container uses, but is primarily
|
||||
informational. Not specifying a port here DOES NOT prevent
|
||||
that port from being exposed. Any port which is listening
|
||||
on the default "0.0.0.0" address inside a container will be
|
||||
accessible from the network. Cannot be updated.
|
||||
description: List of ports to expose from the container. Not
|
||||
specifying a port here DOES NOT prevent that port from being
|
||||
exposed. Any port which is listening on the default "0.0.0.0"
|
||||
address inside a container will be accessible from the network.
|
||||
Modifying this array with strategic merge patch may corrupt
|
||||
the data. For more information See https://github.com/kubernetes/kubernetes/issues/108255.
|
||||
Cannot be updated.
|
||||
items:
|
||||
description: ContainerPort represents a network port in a
|
||||
single container.
|
||||
@ -2880,13 +3230,13 @@ spec:
|
||||
Cannot be updated.
|
||||
type: string
|
||||
ports:
|
||||
description: List of ports to expose from the container. Exposing
|
||||
a port here gives the system additional information about
|
||||
the network connections a container uses, but is primarily
|
||||
informational. Not specifying a port here DOES NOT prevent
|
||||
that port from being exposed. Any port which is listening
|
||||
on the default "0.0.0.0" address inside a container will be
|
||||
accessible from the network. Cannot be updated.
|
||||
description: List of ports to expose from the container. Not
|
||||
specifying a port here DOES NOT prevent that port from being
|
||||
exposed. Any port which is listening on the default "0.0.0.0"
|
||||
address inside a container will be accessible from the network.
|
||||
Modifying this array with strategic merge patch may corrupt
|
||||
the data. For more information See https://github.com/kubernetes/kubernetes/issues/108255.
|
||||
Cannot be updated.
|
||||
items:
|
||||
description: ContainerPort represents a network port in a
|
||||
single container.
|
||||
@ -4509,6 +4859,19 @@ spec:
|
||||
type: object
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
matchLabelKeys:
|
||||
description: MatchLabelKeys is a set of pod label keys to select
|
||||
the pods over which spreading will be calculated. The keys
|
||||
are used to lookup values from the incoming pod labels, those
|
||||
key-value labels are ANDed with labelSelector to select the
|
||||
group of existing pods over which spreading will be calculated
|
||||
for the incoming pod. Keys that don't exist in the incoming
|
||||
pod labels will be ignored. A null or empty list means only
|
||||
match against labelSelector.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
x-kubernetes-list-type: atomic
|
||||
maxSkew:
|
||||
description: 'MaxSkew describes the degree to which pods may
|
||||
be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`,
|
||||
@ -4549,11 +4912,31 @@ spec:
|
||||
minimum\" is treated as 0. In this situation, new pod with
|
||||
the same labelSelector cannot be scheduled, because computed
|
||||
skew will be 3(3 - 0) if new Pod is scheduled to any of the
|
||||
three zones, it will violate MaxSkew. \n This is an alpha
|
||||
field and requires enabling MinDomainsInPodTopologySpread
|
||||
feature gate."
|
||||
three zones, it will violate MaxSkew. \n This is a beta field
|
||||
and requires the MinDomainsInPodTopologySpread feature gate
|
||||
to be enabled (enabled by default)."
|
||||
format: int32
|
||||
type: integer
|
||||
nodeAffinityPolicy:
|
||||
description: "NodeAffinityPolicy indicates how we will treat
|
||||
Pod's nodeAffinity/nodeSelector when calculating pod topology
|
||||
spread skew. Options are: - Honor: only nodes matching nodeAffinity/nodeSelector
|
||||
are included in the calculations. - Ignore: nodeAffinity/nodeSelector
|
||||
are ignored. All nodes are included in the calculations. \n
|
||||
If this value is nil, the behavior is equivalent to the Honor
|
||||
policy. This is a alpha-level feature enabled by the NodeInclusionPolicyInPodTopologySpread
|
||||
feature flag."
|
||||
type: string
|
||||
nodeTaintsPolicy:
|
||||
description: "NodeTaintsPolicy indicates how we will treat node
|
||||
taints when calculating pod topology spread skew. Options
|
||||
are: - Honor: nodes without taints, along with tainted nodes
|
||||
for which the incoming pod has a toleration, are included.
|
||||
- Ignore: node taints are ignored. All nodes are included.
|
||||
\n If this value is nil, the behavior is equivalent to the
|
||||
Ignore policy. This is a alpha-level feature enabled by the
|
||||
NodeInclusionPolicyInPodTopologySpread feature flag."
|
||||
type: string
|
||||
topologyKey:
|
||||
description: TopologyKey is the key of node labels. Nodes that
|
||||
have a label with this key and identical values are considered
|
||||
@ -4561,10 +4944,11 @@ spec:
|
||||
as a "bucket", and try to put balanced number of pods into
|
||||
each bucket. We define a domain as a particular instance of
|
||||
a topology. Also, we define an eligible domain as a domain
|
||||
whose nodes match the node selector. e.g. If TopologyKey is
|
||||
"kubernetes.io/hostname", each Node is a domain of that topology.
|
||||
And, if TopologyKey is "topology.kubernetes.io/zone", each
|
||||
zone is a domain of that topology. It's a required field.
|
||||
whose nodes meet the requirements of nodeAffinityPolicy and
|
||||
nodeTaintsPolicy. e.g. If TopologyKey is "kubernetes.io/hostname",
|
||||
each Node is a domain of that topology. And, if TopologyKey
|
||||
is "topology.kubernetes.io/zone", each zone is a domain of
|
||||
that topology. It's a required field.
|
||||
type: string
|
||||
whenUnsatisfiable:
|
||||
description: 'WhenUnsatisfiable indicates how to deal with a
|
||||
@ -6153,8 +6537,55 @@ spec:
|
||||
web:
|
||||
description: Defines the web command line flags when starting Alertmanager.
|
||||
properties:
|
||||
httpConfig:
|
||||
description: Defines HTTP parameters for web server.
|
||||
properties:
|
||||
headers:
|
||||
description: List of headers that can be added to HTTP responses.
|
||||
properties:
|
||||
contentSecurityPolicy:
|
||||
description: Set the Content-Security-Policy header to
|
||||
HTTP responses. Unset if blank.
|
||||
type: string
|
||||
strictTransportSecurity:
|
||||
description: Set the Strict-Transport-Security header
|
||||
to HTTP responses. Unset if blank. Please make sure
|
||||
that you use this with care as this header might force
|
||||
browsers to load Prometheus and the other applications
|
||||
hosted on the same domain and subdomains over HTTPS.
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Strict-Transport-Security
|
||||
type: string
|
||||
xContentTypeOptions:
|
||||
description: Set the X-Content-Type-Options header to
|
||||
HTTP responses. Unset if blank. Accepted value is nosniff.
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Content-Type-Options
|
||||
enum:
|
||||
- ""
|
||||
- NoSniff
|
||||
type: string
|
||||
xFrameOptions:
|
||||
description: Set the X-Frame-Options header to HTTP responses.
|
||||
Unset if blank. Accepted values are deny and sameorigin.
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Frame-Options
|
||||
enum:
|
||||
- ""
|
||||
- Deny
|
||||
- SameOrigin
|
||||
type: string
|
||||
xXSSProtection:
|
||||
description: Set the X-XSS-Protection header to all responses.
|
||||
Unset if blank. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-XSS-Protection
|
||||
type: string
|
||||
type: object
|
||||
http2:
|
||||
description: Enable HTTP/2 support. Note that HTTP/2 is only
|
||||
supported with TLS. When TLSConfig is not configured, HTTP/2
|
||||
will be disabled. Whenever the value of the field changes,
|
||||
a rolling update will be triggered.
|
||||
type: boolean
|
||||
type: object
|
||||
tlsConfig:
|
||||
description: WebTLSConfig defines the TLS parameters for HTTPS.
|
||||
description: Defines the TLS parameters for HTTPS.
|
||||
properties:
|
||||
cert:
|
||||
description: Contains the TLS certificate for the server.
|
||||
|
@ -1,4 +1,4 @@
|
||||
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
|
||||
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
|
@ -1,4 +1,4 @@
|
||||
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
|
||||
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
|
@ -1,4 +1,4 @@
|
||||
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
|
||||
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
@ -110,6 +110,31 @@ spec:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
additionalArgs:
|
||||
description: AdditionalArgs allows setting additional arguments for
|
||||
the Prometheus container. It is intended for e.g. activating hidden
|
||||
flags which are not supported by the dedicated configuration options
|
||||
yet. The arguments are passed as-is to the Prometheus container
|
||||
which may cause issues if they are invalid or not supporeted by
|
||||
the given Prometheus version. In case of an argument conflict (e.g.
|
||||
an argument which is already set by the operator itself) or when
|
||||
providing an invalid argument the reconciliation will fail and an
|
||||
error will be logged.
|
||||
items:
|
||||
description: Argument as part of the AdditionalArgs list.
|
||||
properties:
|
||||
name:
|
||||
description: Name of the argument, e.g. "scrape.discovery-reload-interval".
|
||||
minLength: 1
|
||||
type: string
|
||||
value:
|
||||
description: Argument value, e.g. 30s. Can be empty for name-only
|
||||
arguments (e.g. --storage.tsdb.no-lockfile)
|
||||
type: string
|
||||
required:
|
||||
- name
|
||||
type: object
|
||||
type: array
|
||||
additionalScrapeConfigs:
|
||||
description: 'AdditionalScrapeConfigs allows specifying a key of a
|
||||
Secret containing additional Prometheus scrape configurations. Scrape
|
||||
@ -2004,13 +2029,13 @@ spec:
|
||||
Cannot be updated.
|
||||
type: string
|
||||
ports:
|
||||
description: List of ports to expose from the container. Exposing
|
||||
a port here gives the system additional information about
|
||||
the network connections a container uses, but is primarily
|
||||
informational. Not specifying a port here DOES NOT prevent
|
||||
that port from being exposed. Any port which is listening
|
||||
on the default "0.0.0.0" address inside a container will be
|
||||
accessible from the network. Cannot be updated.
|
||||
description: List of ports to expose from the container. Not
|
||||
specifying a port here DOES NOT prevent that port from being
|
||||
exposed. Any port which is listening on the default "0.0.0.0"
|
||||
address inside a container will be accessible from the network.
|
||||
Modifying this array with strategic merge patch may corrupt
|
||||
the data. For more information See https://github.com/kubernetes/kubernetes/issues/108255.
|
||||
Cannot be updated.
|
||||
items:
|
||||
description: ContainerPort represents a network port in a
|
||||
single container.
|
||||
@ -3440,13 +3465,13 @@ spec:
|
||||
Cannot be updated.
|
||||
type: string
|
||||
ports:
|
||||
description: List of ports to expose from the container. Exposing
|
||||
a port here gives the system additional information about
|
||||
the network connections a container uses, but is primarily
|
||||
informational. Not specifying a port here DOES NOT prevent
|
||||
that port from being exposed. Any port which is listening
|
||||
on the default "0.0.0.0" address inside a container will be
|
||||
accessible from the network. Cannot be updated.
|
||||
description: List of ports to expose from the container. Not
|
||||
specifying a port here DOES NOT prevent that port from being
|
||||
exposed. Any port which is listening on the default "0.0.0.0"
|
||||
address inside a container will be accessible from the network.
|
||||
Modifying this array with strategic merge patch may corrupt
|
||||
the data. For more information See https://github.com/kubernetes/kubernetes/issues/108255.
|
||||
Cannot be updated.
|
||||
items:
|
||||
description: ContainerPort represents a network port in a
|
||||
single container.
|
||||
@ -6347,6 +6372,29 @@ spec:
|
||||
notice in any release. \n This is experimental and may change significantly
|
||||
without backward compatibility in any release."
|
||||
properties:
|
||||
additionalArgs:
|
||||
description: AdditionalArgs allows setting additional arguments
|
||||
for the Thanos container. The arguments are passed as-is to
|
||||
the Thanos container which may cause issues if they are invalid
|
||||
or not supporeted the given Thanos version. In case of an argument
|
||||
conflict (e.g. an argument which is already set by the operator
|
||||
itself) or when providing an invalid argument the reconciliation
|
||||
will fail and an error will be logged.
|
||||
items:
|
||||
description: Argument as part of the AdditionalArgs list.
|
||||
properties:
|
||||
name:
|
||||
description: Name of the argument, e.g. "scrape.discovery-reload-interval".
|
||||
minLength: 1
|
||||
type: string
|
||||
value:
|
||||
description: Argument value, e.g. 30s. Can be empty for
|
||||
name-only arguments (e.g. --storage.tsdb.no-lockfile)
|
||||
type: string
|
||||
required:
|
||||
- name
|
||||
type: object
|
||||
type: array
|
||||
baseImage:
|
||||
description: 'Thanos base image if other than default. Deprecated:
|
||||
use ''image'' instead'
|
||||
@ -6756,6 +6804,19 @@ spec:
|
||||
type: object
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
matchLabelKeys:
|
||||
description: MatchLabelKeys is a set of pod label keys to select
|
||||
the pods over which spreading will be calculated. The keys
|
||||
are used to lookup values from the incoming pod labels, those
|
||||
key-value labels are ANDed with labelSelector to select the
|
||||
group of existing pods over which spreading will be calculated
|
||||
for the incoming pod. Keys that don't exist in the incoming
|
||||
pod labels will be ignored. A null or empty list means only
|
||||
match against labelSelector.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
x-kubernetes-list-type: atomic
|
||||
maxSkew:
|
||||
description: 'MaxSkew describes the degree to which pods may
|
||||
be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`,
|
||||
@ -6796,11 +6857,31 @@ spec:
|
||||
minimum\" is treated as 0. In this situation, new pod with
|
||||
the same labelSelector cannot be scheduled, because computed
|
||||
skew will be 3(3 - 0) if new Pod is scheduled to any of the
|
||||
three zones, it will violate MaxSkew. \n This is an alpha
|
||||
field and requires enabling MinDomainsInPodTopologySpread
|
||||
feature gate."
|
||||
three zones, it will violate MaxSkew. \n This is a beta field
|
||||
and requires the MinDomainsInPodTopologySpread feature gate
|
||||
to be enabled (enabled by default)."
|
||||
format: int32
|
||||
type: integer
|
||||
nodeAffinityPolicy:
|
||||
description: "NodeAffinityPolicy indicates how we will treat
|
||||
Pod's nodeAffinity/nodeSelector when calculating pod topology
|
||||
spread skew. Options are: - Honor: only nodes matching nodeAffinity/nodeSelector
|
||||
are included in the calculations. - Ignore: nodeAffinity/nodeSelector
|
||||
are ignored. All nodes are included in the calculations. \n
|
||||
If this value is nil, the behavior is equivalent to the Honor
|
||||
policy. This is a alpha-level feature enabled by the NodeInclusionPolicyInPodTopologySpread
|
||||
feature flag."
|
||||
type: string
|
||||
nodeTaintsPolicy:
|
||||
description: "NodeTaintsPolicy indicates how we will treat node
|
||||
taints when calculating pod topology spread skew. Options
|
||||
are: - Honor: nodes without taints, along with tainted nodes
|
||||
for which the incoming pod has a toleration, are included.
|
||||
- Ignore: node taints are ignored. All nodes are included.
|
||||
\n If this value is nil, the behavior is equivalent to the
|
||||
Ignore policy. This is a alpha-level feature enabled by the
|
||||
NodeInclusionPolicyInPodTopologySpread feature flag."
|
||||
type: string
|
||||
topologyKey:
|
||||
description: TopologyKey is the key of node labels. Nodes that
|
||||
have a label with this key and identical values are considered
|
||||
@ -6808,10 +6889,11 @@ spec:
|
||||
as a "bucket", and try to put balanced number of pods into
|
||||
each bucket. We define a domain as a particular instance of
|
||||
a topology. Also, we define an eligible domain as a domain
|
||||
whose nodes match the node selector. e.g. If TopologyKey is
|
||||
"kubernetes.io/hostname", each Node is a domain of that topology.
|
||||
And, if TopologyKey is "topology.kubernetes.io/zone", each
|
||||
zone is a domain of that topology. It's a required field.
|
||||
whose nodes meet the requirements of nodeAffinityPolicy and
|
||||
nodeTaintsPolicy. e.g. If TopologyKey is "kubernetes.io/hostname",
|
||||
each Node is a domain of that topology. And, if TopologyKey
|
||||
is "topology.kubernetes.io/zone", each zone is a domain of
|
||||
that topology. It's a required field.
|
||||
type: string
|
||||
whenUnsatisfiable:
|
||||
description: 'WhenUnsatisfiable indicates how to deal with a
|
||||
@ -8404,11 +8486,58 @@ spec:
|
||||
web:
|
||||
description: Defines the web command line flags when starting Prometheus.
|
||||
properties:
|
||||
httpConfig:
|
||||
description: Defines HTTP parameters for web server.
|
||||
properties:
|
||||
headers:
|
||||
description: List of headers that can be added to HTTP responses.
|
||||
properties:
|
||||
contentSecurityPolicy:
|
||||
description: Set the Content-Security-Policy header to
|
||||
HTTP responses. Unset if blank.
|
||||
type: string
|
||||
strictTransportSecurity:
|
||||
description: Set the Strict-Transport-Security header
|
||||
to HTTP responses. Unset if blank. Please make sure
|
||||
that you use this with care as this header might force
|
||||
browsers to load Prometheus and the other applications
|
||||
hosted on the same domain and subdomains over HTTPS.
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Strict-Transport-Security
|
||||
type: string
|
||||
xContentTypeOptions:
|
||||
description: Set the X-Content-Type-Options header to
|
||||
HTTP responses. Unset if blank. Accepted value is nosniff.
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Content-Type-Options
|
||||
enum:
|
||||
- ""
|
||||
- NoSniff
|
||||
type: string
|
||||
xFrameOptions:
|
||||
description: Set the X-Frame-Options header to HTTP responses.
|
||||
Unset if blank. Accepted values are deny and sameorigin.
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Frame-Options
|
||||
enum:
|
||||
- ""
|
||||
- Deny
|
||||
- SameOrigin
|
||||
type: string
|
||||
xXSSProtection:
|
||||
description: Set the X-XSS-Protection header to all responses.
|
||||
Unset if blank. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-XSS-Protection
|
||||
type: string
|
||||
type: object
|
||||
http2:
|
||||
description: Enable HTTP/2 support. Note that HTTP/2 is only
|
||||
supported with TLS. When TLSConfig is not configured, HTTP/2
|
||||
will be disabled. Whenever the value of the field changes,
|
||||
a rolling update will be triggered.
|
||||
type: boolean
|
||||
type: object
|
||||
pageTitle:
|
||||
description: The prometheus web page title
|
||||
type: string
|
||||
tlsConfig:
|
||||
description: WebTLSConfig defines the TLS parameters for HTTPS.
|
||||
description: Defines the TLS parameters for HTTPS.
|
||||
properties:
|
||||
cert:
|
||||
description: Contains the TLS certificate for the server.
|
||||
|
@ -1,4 +1,4 @@
|
||||
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
|
||||
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
|
@ -1,4 +1,4 @@
|
||||
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
|
||||
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
|
@ -1,4 +1,4 @@
|
||||
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
|
||||
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
@ -1508,13 +1508,13 @@ spec:
|
||||
Cannot be updated.
|
||||
type: string
|
||||
ports:
|
||||
description: List of ports to expose from the container. Exposing
|
||||
a port here gives the system additional information about
|
||||
the network connections a container uses, but is primarily
|
||||
informational. Not specifying a port here DOES NOT prevent
|
||||
that port from being exposed. Any port which is listening
|
||||
on the default "0.0.0.0" address inside a container will be
|
||||
accessible from the network. Cannot be updated.
|
||||
description: List of ports to expose from the container. Not
|
||||
specifying a port here DOES NOT prevent that port from being
|
||||
exposed. Any port which is listening on the default "0.0.0.0"
|
||||
address inside a container will be accessible from the network.
|
||||
Modifying this array with strategic merge patch may corrupt
|
||||
the data. For more information See https://github.com/kubernetes/kubernetes/issues/108255.
|
||||
Cannot be updated.
|
||||
items:
|
||||
description: ContainerPort represents a network port in a
|
||||
single container.
|
||||
@ -2955,13 +2955,13 @@ spec:
|
||||
Cannot be updated.
|
||||
type: string
|
||||
ports:
|
||||
description: List of ports to expose from the container. Exposing
|
||||
a port here gives the system additional information about
|
||||
the network connections a container uses, but is primarily
|
||||
informational. Not specifying a port here DOES NOT prevent
|
||||
that port from being exposed. Any port which is listening
|
||||
on the default "0.0.0.0" address inside a container will be
|
||||
accessible from the network. Cannot be updated.
|
||||
description: List of ports to expose from the container. Not
|
||||
specifying a port here DOES NOT prevent that port from being
|
||||
exposed. Any port which is listening on the default "0.0.0.0"
|
||||
address inside a container will be accessible from the network.
|
||||
Modifying this array with strategic merge patch may corrupt
|
||||
the data. For more information See https://github.com/kubernetes/kubernetes/issues/108255.
|
||||
Cannot be updated.
|
||||
items:
|
||||
description: ContainerPort represents a network port in a
|
||||
single container.
|
||||
@ -4731,6 +4731,19 @@ spec:
|
||||
type: object
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
matchLabelKeys:
|
||||
description: MatchLabelKeys is a set of pod label keys to select
|
||||
the pods over which spreading will be calculated. The keys
|
||||
are used to lookup values from the incoming pod labels, those
|
||||
key-value labels are ANDed with labelSelector to select the
|
||||
group of existing pods over which spreading will be calculated
|
||||
for the incoming pod. Keys that don't exist in the incoming
|
||||
pod labels will be ignored. A null or empty list means only
|
||||
match against labelSelector.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
x-kubernetes-list-type: atomic
|
||||
maxSkew:
|
||||
description: 'MaxSkew describes the degree to which pods may
|
||||
be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`,
|
||||
@ -4771,11 +4784,31 @@ spec:
|
||||
minimum\" is treated as 0. In this situation, new pod with
|
||||
the same labelSelector cannot be scheduled, because computed
|
||||
skew will be 3(3 - 0) if new Pod is scheduled to any of the
|
||||
three zones, it will violate MaxSkew. \n This is an alpha
|
||||
field and requires enabling MinDomainsInPodTopologySpread
|
||||
feature gate."
|
||||
three zones, it will violate MaxSkew. \n This is a beta field
|
||||
and requires the MinDomainsInPodTopologySpread feature gate
|
||||
to be enabled (enabled by default)."
|
||||
format: int32
|
||||
type: integer
|
||||
nodeAffinityPolicy:
|
||||
description: "NodeAffinityPolicy indicates how we will treat
|
||||
Pod's nodeAffinity/nodeSelector when calculating pod topology
|
||||
spread skew. Options are: - Honor: only nodes matching nodeAffinity/nodeSelector
|
||||
are included in the calculations. - Ignore: nodeAffinity/nodeSelector
|
||||
are ignored. All nodes are included in the calculations. \n
|
||||
If this value is nil, the behavior is equivalent to the Honor
|
||||
policy. This is a alpha-level feature enabled by the NodeInclusionPolicyInPodTopologySpread
|
||||
feature flag."
|
||||
type: string
|
||||
nodeTaintsPolicy:
|
||||
description: "NodeTaintsPolicy indicates how we will treat node
|
||||
taints when calculating pod topology spread skew. Options
|
||||
are: - Honor: nodes without taints, along with tainted nodes
|
||||
for which the incoming pod has a toleration, are included.
|
||||
- Ignore: node taints are ignored. All nodes are included.
|
||||
\n If this value is nil, the behavior is equivalent to the
|
||||
Ignore policy. This is a alpha-level feature enabled by the
|
||||
NodeInclusionPolicyInPodTopologySpread feature flag."
|
||||
type: string
|
||||
topologyKey:
|
||||
description: TopologyKey is the key of node labels. Nodes that
|
||||
have a label with this key and identical values are considered
|
||||
@ -4783,10 +4816,11 @@ spec:
|
||||
as a "bucket", and try to put balanced number of pods into
|
||||
each bucket. We define a domain as a particular instance of
|
||||
a topology. Also, we define an eligible domain as a domain
|
||||
whose nodes match the node selector. e.g. If TopologyKey is
|
||||
"kubernetes.io/hostname", each Node is a domain of that topology.
|
||||
And, if TopologyKey is "topology.kubernetes.io/zone", each
|
||||
zone is a domain of that topology. It's a required field.
|
||||
whose nodes meet the requirements of nodeAffinityPolicy and
|
||||
nodeTaintsPolicy. e.g. If TopologyKey is "kubernetes.io/hostname",
|
||||
each Node is a domain of that topology. And, if TopologyKey
|
||||
is "topology.kubernetes.io/zone", each zone is a domain of
|
||||
that topology. It's a required field.
|
||||
type: string
|
||||
whenUnsatisfiable:
|
||||
description: 'WhenUnsatisfiable indicates how to deal with a
|
||||
@ -4832,6 +4866,11 @@ spec:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
tracingConfigFile:
|
||||
description: TracingConfig specifies the path of the tracing configuration
|
||||
file. When used alongside with TracingConfig, TracingConfigFile
|
||||
takes precedence.
|
||||
type: string
|
||||
volumes:
|
||||
description: Volumes allows configuration of additional volumes on
|
||||
the output StatefulSet definition. Volumes specified will be appended
|
||||
|
@ -42,6 +42,10 @@ spec:
|
||||
- --host={{ template "kube-prometheus-stack.operator.fullname" . }},{{ template "kube-prometheus-stack.operator.fullname" . }}.{{ template "kube-prometheus-stack.namespace" . }}.svc
|
||||
- --namespace={{ template "kube-prometheus-stack.namespace" . }}
|
||||
- --secret-name={{ template "kube-prometheus-stack.fullname" . }}-admission
|
||||
{{- with .Values.prometheusOperator.admissionWebhooks.createSecretJob }}
|
||||
securityContext:
|
||||
{{ toYaml .securityContext | nindent 12 }}
|
||||
{{- end }}
|
||||
resources:
|
||||
{{ toYaml .Values.prometheusOperator.admissionWebhooks.patch.resources | indent 12 }}
|
||||
restartPolicy: OnFailure
|
||||
|
@ -43,6 +43,10 @@ spec:
|
||||
- --namespace={{ template "kube-prometheus-stack.namespace" . }}
|
||||
- --secret-name={{ template "kube-prometheus-stack.fullname" . }}-admission
|
||||
- --patch-failure-policy={{ .Values.prometheusOperator.admissionWebhooks.failurePolicy }}
|
||||
{{- with .Values.prometheusOperator.admissionWebhooks.patchWebhookJob }}
|
||||
securityContext:
|
||||
{{ toYaml .securityContext | nindent 12 }}
|
||||
{{- end }}
|
||||
resources:
|
||||
{{ toYaml .Values.prometheusOperator.admissionWebhooks.patch.resources | indent 12 }}
|
||||
restartPolicy: OnFailure
|
||||
|
@ -12,5 +12,5 @@ metadata:
|
||||
app: {{ template "kube-prometheus-stack.name" . }}-prometheus-am-confg
|
||||
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
|
||||
data:
|
||||
additional-alertmanager-configs.yaml: {{ toYaml .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs | b64enc | quote }}
|
||||
additional-alertmanager-configs.yaml: {{ tpl (toYaml .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs) . | b64enc | quote }}
|
||||
{{- end }}
|
||||
|
@ -112,8 +112,10 @@ spec:
|
||||
{{- if .Values.prometheus.prometheusSpec.retentionSize }}
|
||||
retentionSize: {{ .Values.prometheus.prometheusSpec.retentionSize | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.prometheus.prometheusSpec.walCompression }}
|
||||
walCompression: {{ .Values.prometheus.prometheusSpec.walCompression }}
|
||||
{{- if eq .Values.prometheus.prometheusSpec.walCompression false }}
|
||||
walCompression: false
|
||||
{{ else }}
|
||||
walCompression: true
|
||||
{{- end }}
|
||||
{{- if .Values.prometheus.prometheusSpec.routePrefix }}
|
||||
routePrefix: {{ .Values.prometheus.prometheusSpec.routePrefix | quote }}
|
||||
@ -354,7 +356,7 @@ spec:
|
||||
name: "{{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) . | trunc 63 | trimSuffix "-" }}"
|
||||
{{- end }}
|
||||
{{- if .Values.prometheus.prometheusSpec.excludedFromEnforcement }}
|
||||
{{ toYaml .Values.prometheus.prometheusSpec.excludedFromEnforcement | indent 4 }}
|
||||
{{ tpl (toYaml .Values.prometheus.prometheusSpec.excludedFromEnforcement | indent 4) . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.prometheus.prometheusSpec.queryLogFile }}
|
||||
|
@ -1530,6 +1530,7 @@ prometheus-node-exporter:
|
||||
## Add the 'node-exporter' label to be used by serviceMonitor to match standard common usage in rules and grafana dashboards
|
||||
##
|
||||
jobLabel: node-exporter
|
||||
releaseLabel: true
|
||||
extraArgs:
|
||||
- --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/)
|
||||
- --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
|
||||
@ -1610,7 +1611,7 @@ prometheusOperator:
|
||||
enabled: true
|
||||
image:
|
||||
repository: k8s.gcr.io/ingress-nginx/kube-webhook-certgen
|
||||
tag: v1.2.0
|
||||
tag: v1.3.0
|
||||
sha: ""
|
||||
pullPolicy: IfNotPresent
|
||||
resources: {}
|
||||
@ -1631,6 +1632,14 @@ prometheusOperator:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 2000
|
||||
|
||||
# Security context for create job container
|
||||
createSecretJob:
|
||||
securityContext: {}
|
||||
|
||||
# Security context for patch job container
|
||||
patchWebhookJob:
|
||||
securityContext: {}
|
||||
|
||||
# Use certmanager to generate webhook certs
|
||||
certManager:
|
||||
enabled: false
|
||||
@ -1841,7 +1850,7 @@ prometheusOperator:
|
||||
##
|
||||
image:
|
||||
repository: quay.io/prometheus-operator/prometheus-operator
|
||||
tag: v0.58.0
|
||||
tag: v0.59.1
|
||||
sha: ""
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
@ -1856,10 +1865,9 @@ prometheusOperator:
|
||||
## Prometheus-config-reloader
|
||||
##
|
||||
prometheusConfigReloader:
|
||||
# image to use for config and rule reloading
|
||||
image:
|
||||
repository: quay.io/prometheus-operator/prometheus-config-reloader
|
||||
tag: v0.58.0
|
||||
tag: v0.59.1
|
||||
sha: ""
|
||||
|
||||
# resource config for prometheusConfigReloader
|
||||
@ -1875,7 +1883,7 @@ prometheusOperator:
|
||||
##
|
||||
thanosImage:
|
||||
repository: quay.io/thanos/thanos
|
||||
tag: v0.27.0
|
||||
tag: v0.28.0
|
||||
sha: ""
|
||||
|
||||
## Set a Field Selector to filter watched secrets
|
||||
@ -2308,7 +2316,7 @@ prometheus:
|
||||
##
|
||||
image:
|
||||
repository: quay.io/prometheus/prometheus
|
||||
tag: v2.37.0
|
||||
tag: v2.38.0
|
||||
sha: ""
|
||||
|
||||
## Tolerations for use with node taints
|
||||
@ -2500,7 +2508,7 @@ prometheus:
|
||||
|
||||
## Enable compression of the write-ahead log using Snappy.
|
||||
##
|
||||
walCompression: false
|
||||
walCompression: true
|
||||
|
||||
## If true, the Operator won't process any Prometheus configuration changes
|
||||
##
|
||||
@ -3139,7 +3147,7 @@ thanosRuler:
|
||||
##
|
||||
image:
|
||||
repository: quay.io/thanos/thanos
|
||||
tag: v0.27.0
|
||||
tag: v0.28.0
|
||||
sha: ""
|
||||
|
||||
## Namespaces to be selected for PrometheusRules discovery.
|
||||
|
@ -56,7 +56,11 @@
|
||||
},
|
||||
"gridPos": { },
|
||||
"id": 3,
|
||||
"interval": null,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"rightSide": true
|
||||
},
|
||||
"links": [ ],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
@ -128,13 +132,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -236,7 +241,11 @@
|
||||
},
|
||||
"gridPos": { },
|
||||
"id": 5,
|
||||
"interval": null,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"rightSide": true
|
||||
},
|
||||
"links": [ ],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
@ -307,13 +316,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 6,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -406,13 +416,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 7,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -488,13 +499,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 8,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -516,7 +528,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{verb=\"read\", cluster=\"$cluster\"}",
|
||||
"expr": "cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile{verb=\"read\", cluster=\"$cluster\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{ resource }}",
|
||||
@ -594,7 +606,11 @@
|
||||
},
|
||||
"gridPos": { },
|
||||
"id": 9,
|
||||
"interval": null,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"rightSide": true
|
||||
},
|
||||
"links": [ ],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
@ -665,13 +681,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 10,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -764,13 +781,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 11,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -846,13 +864,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 12,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -874,7 +893,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{verb=\"write\", cluster=\"$cluster\"}",
|
||||
"expr": "cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile{verb=\"write\", cluster=\"$cluster\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{ resource }}",
|
||||
@ -940,13 +959,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 13,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": false,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -968,7 +988,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name)",
|
||||
"expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} {{name}}",
|
||||
@ -1021,13 +1041,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 14,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": false,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -1049,7 +1070,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name)",
|
||||
"expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} {{name}}",
|
||||
@ -1102,6 +1123,7 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 15,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
@ -1130,7 +1152,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} {{name}}",
|
||||
@ -1196,13 +1218,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 16,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -1277,13 +1300,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 17,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -1305,7 +1329,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])",
|
||||
"expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
@ -1358,13 +1382,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 18,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -1452,7 +1477,7 @@
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
@ -1470,7 +1495,7 @@
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(apiserver_request_total, cluster)",
|
||||
"query": "label_values(up{job=\"apiserver\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
@ -1490,7 +1515,7 @@
|
||||
"multi": false,
|
||||
"name": "instance",
|
||||
"options": [ ],
|
||||
"query": "label_values(apiserver_request_total{job=\"apiserver\", cluster=\"$cluster\"}, instance)",
|
||||
"query": "label_values(up{job=\"apiserver\", cluster=\"$cluster\"}, instance)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
|
@ -1619,7 +1619,7 @@
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
|
@ -36,7 +36,11 @@
|
||||
},
|
||||
"gridPos": { },
|
||||
"id": 2,
|
||||
"interval": null,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"rightSide": true
|
||||
},
|
||||
"links": [ ],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
@ -106,6 +110,7 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 3,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
@ -134,7 +139,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)",
|
||||
"expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}} {{instance}} {{name}}",
|
||||
@ -200,6 +205,7 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
@ -228,7 +234,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)",
|
||||
"expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}} {{instance}} {{name}}",
|
||||
@ -294,6 +300,7 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 5,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
@ -322,7 +329,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}} {{instance}} {{name}}",
|
||||
@ -388,13 +395,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 6,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -416,28 +424,28 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
|
||||
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "2xx",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
|
||||
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "3xx",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
|
||||
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "4xx",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
|
||||
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "5xx",
|
||||
@ -490,13 +498,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 7,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -518,7 +527,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{verb}} {{url}}",
|
||||
@ -584,6 +593,7 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 8,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
@ -612,7 +622,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{verb}} {{url}}",
|
||||
@ -678,13 +688,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 9,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -759,13 +770,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 10,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -787,7 +799,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}[5m])",
|
||||
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
@ -840,13 +852,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 11,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -934,7 +947,7 @@
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
|
@ -24,10 +24,12 @@
|
||||
"id": 1,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -47,7 +49,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[$__rate_interval]))",
|
||||
"expr": "cluster:node_cpu:ratio_rate5m{cluster=\"$cluster\"}",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -99,11 +101,14 @@
|
||||
"fill": 1,
|
||||
"format": "percentunit",
|
||||
"id": 2,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -123,7 +128,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"cpu\",cluster=\"$cluster\"})",
|
||||
"expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -175,11 +180,14 @@
|
||||
"fill": 1,
|
||||
"format": "percentunit",
|
||||
"id": 3,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -199,7 +207,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"cpu\",cluster=\"$cluster\"})",
|
||||
"expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -251,11 +259,14 @@
|
||||
"fill": 1,
|
||||
"format": "percentunit",
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -275,7 +286,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{cluster=\"$cluster\"})",
|
||||
"expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{job=\"node-exporter\",cluster=\"$cluster\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -327,11 +338,14 @@
|
||||
"fill": 1,
|
||||
"format": "percentunit",
|
||||
"id": 5,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -351,7 +365,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})",
|
||||
"expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -403,11 +417,14 @@
|
||||
"fill": 1,
|
||||
"format": "percentunit",
|
||||
"id": 6,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -427,7 +444,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})",
|
||||
"expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -490,11 +507,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 7,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -578,11 +598,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 8,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -741,7 +764,7 @@
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_owner{cluster=\"$cluster\"}) by (namespace)",
|
||||
"expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -861,11 +884,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 9,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -885,7 +911,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)",
|
||||
"expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -949,11 +975,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 10,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1112,7 +1141,7 @@
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_owner{cluster=\"$cluster\"}) by (namespace)",
|
||||
"expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1130,7 +1159,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)",
|
||||
"expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1148,7 +1177,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
|
||||
"expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1166,7 +1195,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
|
||||
"expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1234,10 +1263,12 @@
|
||||
"id": 11,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1381,7 +1412,7 @@
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1390,7 +1421,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1399,7 +1430,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1408,7 +1439,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1417,7 +1448,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1426,7 +1457,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1492,11 +1523,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 12,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1516,7 +1550,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -1568,11 +1602,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 13,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1592,7 +1629,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -1656,11 +1693,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 14,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1680,7 +1720,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"expr": "avg(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -1732,11 +1772,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 15,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1756,7 +1799,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"expr": "avg(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -1820,11 +1863,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 16,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1844,7 +1890,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -1896,11 +1942,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 17,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1920,7 +1969,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -1984,11 +2033,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 18,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -2008,7 +2060,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -2060,11 +2112,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 19,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -2084,7 +2139,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -2149,11 +2204,14 @@
|
||||
"decimals": -1,
|
||||
"fill": 10,
|
||||
"id": 20,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -2173,7 +2231,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m])))",
|
||||
"expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval])))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -2225,11 +2283,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 21,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -2249,7 +2310,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
|
||||
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -2313,11 +2374,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 22,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -2465,7 +2529,7 @@
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
|
||||
"expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2474,7 +2538,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum by(namespace) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
|
||||
"expr": "sum by(namespace) (rate(container_fs_writes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2483,7 +2547,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
|
||||
"expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2492,7 +2556,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
|
||||
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2501,7 +2565,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
|
||||
"expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2510,7 +2574,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
|
||||
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
|
@ -22,11 +22,14 @@
|
||||
"fill": 1,
|
||||
"format": "percentunit",
|
||||
"id": 1,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -46,7 +49,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -98,11 +101,14 @@
|
||||
"fill": 1,
|
||||
"format": "percentunit",
|
||||
"id": 2,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -122,7 +128,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -174,11 +180,14 @@
|
||||
"fill": 1,
|
||||
"format": "percentunit",
|
||||
"id": 3,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -198,7 +207,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
|
||||
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -250,11 +259,14 @@
|
||||
"fill": 1,
|
||||
"format": "percentunit",
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -274,7 +286,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
|
||||
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -337,11 +349,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 5,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -464,11 +479,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 6,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -699,11 +717,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 7,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -746,7 +767,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)",
|
||||
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -826,11 +847,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 8,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1004,7 +1028,7 @@
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)",
|
||||
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1022,7 +1046,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1040,7 +1064,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1049,7 +1073,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
|
||||
"expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1058,7 +1082,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
|
||||
"expr": "sum(container_memory_cache{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1067,7 +1091,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
|
||||
"expr": "sum(container_memory_swap{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1135,10 +1159,12 @@
|
||||
"id": 9,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1282,7 +1308,7 @@
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1291,7 +1317,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1300,7 +1326,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1309,7 +1335,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1318,7 +1344,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1327,7 +1353,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1393,11 +1419,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 10,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1469,11 +1498,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 11,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1557,11 +1589,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 12,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1633,11 +1668,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 13,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1721,11 +1759,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 14,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1797,11 +1838,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 15,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1886,11 +1930,14 @@
|
||||
"decimals": -1,
|
||||
"fill": 10,
|
||||
"id": 16,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1910,7 +1957,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m])))",
|
||||
"expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{container!=\"\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1962,11 +2009,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 17,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1986,7 +2036,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))",
|
||||
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{container!=\"\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -2050,11 +2100,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 18,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -2202,7 +2255,7 @@
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))",
|
||||
"expr": "sum by(pod) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2211,7 +2264,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum by(pod) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))",
|
||||
"expr": "sum by(pod) (rate(container_fs_writes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2220,7 +2273,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))",
|
||||
"expr": "sum by(pod) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2229,7 +2282,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))",
|
||||
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2238,7 +2291,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum by(pod) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))",
|
||||
"expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2247,7 +2300,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[5m]))",
|
||||
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2336,7 +2389,7 @@
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(kube_pod_info, cluster)",
|
||||
"query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
@ -2359,7 +2412,7 @@
|
||||
"multi": false,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
|
||||
"query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
|
@ -21,11 +21,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 1,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -38,12 +41,32 @@
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "max capacity",
|
||||
"color": "#F2495C",
|
||||
"dashes": true,
|
||||
"fill": 0,
|
||||
"hiddenSeries": true,
|
||||
"hideTooltip": true,
|
||||
"legend": true,
|
||||
"linewidth": 2,
|
||||
"stack": false
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "max capacity",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||
"format": "time_series",
|
||||
@ -109,11 +132,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 2,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -344,11 +370,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 3,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -361,12 +390,32 @@
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "max capacity",
|
||||
"color": "#F2495C",
|
||||
"dashes": true,
|
||||
"fill": 0,
|
||||
"hiddenSeries": true,
|
||||
"hideTooltip": true,
|
||||
"legend": true,
|
||||
"linewidth": 2,
|
||||
"stack": false
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "max capacity",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)",
|
||||
"format": "time_series",
|
||||
@ -432,11 +481,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -762,7 +814,7 @@
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(kube_pod_info, cluster)",
|
||||
"query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
@ -785,7 +837,7 @@
|
||||
"multi": true,
|
||||
"name": "node",
|
||||
"options": [ ],
|
||||
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, node)",
|
||||
"query": "label_values(kube_node_info{cluster=\"$cluster\"}, node)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
|
@ -21,11 +21,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 1,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -72,7 +75,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n",
|
||||
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "requests",
|
||||
@ -80,7 +83,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n",
|
||||
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "limits",
|
||||
@ -144,11 +147,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 2,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": true,
|
||||
"max": true,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -168,7 +174,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(increase(container_cpu_cfs_throttled_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[5m])) by (container) /sum(increase(container_cpu_cfs_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[5m])) by (container)",
|
||||
"expr": "sum(increase(container_cpu_cfs_throttled_periods_total{job=\"cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) /sum(increase(container_cpu_cfs_periods_total{job=\"cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{container}}",
|
||||
@ -241,11 +247,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 3,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -476,11 +485,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -521,7 +533,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)",
|
||||
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{container}}",
|
||||
@ -529,7 +541,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n",
|
||||
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "requests",
|
||||
@ -537,7 +549,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n",
|
||||
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "limits",
|
||||
@ -601,11 +613,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 5,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -779,7 +794,7 @@
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)",
|
||||
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -797,7 +812,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -815,7 +830,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -824,7 +839,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
|
||||
"expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -833,7 +848,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
|
||||
"expr": "sum(container_memory_cache{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -842,7 +857,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
|
||||
"expr": "sum(container_memory_swap{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -910,10 +925,12 @@
|
||||
"id": 6,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -933,7 +950,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -987,10 +1004,12 @@
|
||||
"id": 7,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1010,7 +1029,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1076,10 +1095,12 @@
|
||||
"id": 8,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1099,7 +1120,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1153,10 +1174,12 @@
|
||||
"id": 9,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1176,7 +1199,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1242,10 +1265,12 @@
|
||||
"id": 10,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1265,7 +1290,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1319,10 +1344,12 @@
|
||||
"id": 11,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1342,7 +1369,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1407,11 +1434,14 @@
|
||||
"decimals": -1,
|
||||
"fill": 10,
|
||||
"id": 12,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1431,7 +1461,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[5m])))",
|
||||
"expr": "ceil(sum by(pod) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Reads",
|
||||
@ -1439,7 +1469,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "ceil(sum by(pod) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[5m])))",
|
||||
"expr": "ceil(sum by(pod) (rate(container_fs_writes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Writes",
|
||||
@ -1491,11 +1521,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 13,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1515,7 +1548,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[5m]))",
|
||||
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Reads",
|
||||
@ -1523,7 +1556,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum by(pod) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[5m]))",
|
||||
"expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Writes",
|
||||
@ -1588,11 +1621,14 @@
|
||||
"decimals": -1,
|
||||
"fill": 10,
|
||||
"id": 14,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1612,7 +1648,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ceil(sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m])))",
|
||||
"expr": "ceil(sum by(container) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{container}}",
|
||||
@ -1664,11 +1700,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 15,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1688,7 +1727,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))",
|
||||
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{container}}",
|
||||
@ -1752,11 +1791,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 16,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1904,7 +1946,7 @@
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))",
|
||||
"expr": "sum by(container) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1913,7 +1955,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum by(container) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))",
|
||||
"expr": "sum by(container) (rate(container_fs_writes_total{job=\"cadvisor\",device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1922,7 +1964,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))",
|
||||
"expr": "sum by(container) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1931,7 +1973,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))",
|
||||
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1940,7 +1982,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum by(container) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))",
|
||||
"expr": "sum by(container) (rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1949,7 +1991,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[5m]))",
|
||||
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2038,7 +2080,7 @@
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(kube_pod_info, cluster)",
|
||||
"query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
@ -2061,7 +2103,7 @@
|
||||
"multi": false,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
|
||||
"query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
@ -2084,7 +2126,7 @@
|
||||
"multi": false,
|
||||
"name": "pod",
|
||||
"options": [ ],
|
||||
"query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=\"$namespace\"}, pod)",
|
||||
"query": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, pod)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
|
@ -21,11 +21,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 1,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -109,11 +112,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 2,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -251,7 +257,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -260,7 +266,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -269,7 +275,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -278,7 +284,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -344,11 +350,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 3,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -432,11 +441,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -574,7 +586,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -583,7 +595,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -592,7 +604,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -601,7 +613,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -669,10 +681,12 @@
|
||||
"id": 5,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -816,7 +830,7 @@
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -825,7 +839,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -834,7 +848,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -843,7 +857,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -852,7 +866,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -861,7 +875,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -927,11 +941,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 6,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -951,7 +968,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1003,11 +1020,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 7,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1027,7 +1047,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1091,11 +1111,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 8,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1115,7 +1138,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(avg(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1167,11 +1190,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 9,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1191,7 +1217,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(avg(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1255,11 +1281,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 10,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1279,7 +1308,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1331,11 +1360,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 11,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1355,7 +1387,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1419,11 +1451,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 12,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1443,7 +1478,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1495,11 +1530,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 13,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1519,7 +1557,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1606,7 +1644,7 @@
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(kube_pod_info, cluster)",
|
||||
"query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
@ -1629,30 +1667,7 @@
|
||||
"multi": false,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "workload",
|
||||
"options": [ ],
|
||||
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)",
|
||||
"query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
@ -1675,7 +1690,30 @@
|
||||
"multi": false,
|
||||
"name": "type",
|
||||
"options": [ ],
|
||||
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)",
|
||||
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\"}, workload_type)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "workload",
|
||||
"options": [ ],
|
||||
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}, workload)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
|
@ -21,11 +21,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 1,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -148,11 +151,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 2,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -329,7 +335,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -338,7 +344,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -347,7 +353,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -356,7 +362,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -422,11 +428,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 3,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -469,7 +478,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}} - {{workload_type}}",
|
||||
@ -549,11 +558,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -721,7 +733,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -730,7 +742,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -739,7 +751,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -748,7 +760,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -757,7 +769,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -825,10 +837,12 @@
|
||||
"id": 5,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -987,7 +1001,7 @@
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -996,7 +1010,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1005,7 +1019,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1014,7 +1028,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1023,7 +1037,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1032,7 +1046,7 @@
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1098,11 +1112,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 6,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1122,7 +1139,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -1174,11 +1191,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 7,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1198,7 +1218,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -1262,11 +1282,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 8,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1286,7 +1309,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(avg(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -1338,11 +1361,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 9,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1362,7 +1388,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(avg(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -1426,11 +1452,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 10,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1450,7 +1479,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -1502,11 +1531,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 11,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1526,7 +1558,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -1590,11 +1622,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 12,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1614,7 +1649,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -1666,11 +1701,14 @@
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 13,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -1690,7 +1728,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -1777,7 +1815,30 @@
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(kube_pod_info, cluster)",
|
||||
"query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
@ -1814,29 +1875,6 @@
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -154,7 +154,7 @@
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Running Container",
|
||||
"title": "Running Containers",
|
||||
"transparent": false,
|
||||
"type": "stat"
|
||||
},
|
||||
@ -294,7 +294,7 @@
|
||||
"pluginVersion": "7",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m]))",
|
||||
"expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
@ -347,7 +347,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (operation_type, instance)",
|
||||
"expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (operation_type, instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} {{operation_type}}",
|
||||
@ -432,7 +432,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type)",
|
||||
"expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} {{operation_type}}",
|
||||
@ -517,7 +517,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} {{operation_type}}",
|
||||
@ -602,14 +602,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)",
|
||||
"expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} pod",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)",
|
||||
"expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} worker",
|
||||
@ -694,14 +694,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} pod",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} worker",
|
||||
@ -788,7 +788,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
|
||||
"expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
|
||||
@ -875,7 +875,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
|
||||
"expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
|
||||
@ -962,7 +962,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
|
||||
@ -1047,7 +1047,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type)",
|
||||
"expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{operation_type}}",
|
||||
@ -1132,7 +1132,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} {{operation_type}}",
|
||||
@ -1218,7 +1218,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance)",
|
||||
"expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
@ -1303,7 +1303,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
@ -1388,7 +1388,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
@ -1473,28 +1473,28 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "2xx",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "3xx",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "4xx",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "5xx",
|
||||
@ -1579,7 +1579,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, verb, url, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} {{verb}} {{url}}",
|
||||
@ -1749,7 +1749,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])",
|
||||
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
@ -1893,7 +1893,7 @@
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
@ -1927,11 +1927,11 @@
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": null,
|
||||
"label": "instance",
|
||||
"multi": false,
|
||||
"name": "instance",
|
||||
"options": [ ],
|
||||
"query": "label_values(kubelet_runtime_operations_total{cluster=\"$cluster\", job=\"kubelet\"}, instance)",
|
||||
"query": "label_values(up{job=\"kubelet\",cluster=\"$cluster\"}, instance)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
|
@ -1137,7 +1137,7 @@
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
|
@ -1349,7 +1349,7 @@
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
|
@ -26,13 +26,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 2,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": true,
|
||||
"max": true,
|
||||
"min": true,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -124,7 +125,11 @@
|
||||
},
|
||||
"gridPos": { },
|
||||
"id": 3,
|
||||
"interval": null,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"rightSide": true
|
||||
},
|
||||
"links": [ ],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
@ -207,13 +212,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": true,
|
||||
"max": true,
|
||||
"min": true,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -305,7 +311,11 @@
|
||||
},
|
||||
"gridPos": { },
|
||||
"id": 5,
|
||||
"interval": null,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"rightSide": true
|
||||
},
|
||||
"links": [ ],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
@ -388,7 +398,7 @@
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
@ -406,7 +416,7 @@
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(kubelet_volume_stats_capacity_bytes, cluster)",
|
||||
"query": "label_values(kubelet_volume_stats_capacity_bytes{job=\"kubelet\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
|
@ -903,7 +903,7 @@
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
|
@ -36,7 +36,11 @@
|
||||
},
|
||||
"gridPos": { },
|
||||
"id": 2,
|
||||
"interval": null,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"rightSide": true
|
||||
},
|
||||
"links": [ ],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
@ -106,13 +110,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 3,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -134,7 +139,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))",
|
||||
"expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "rate",
|
||||
@ -187,6 +192,7 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
@ -215,7 +221,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))",
|
||||
"expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
@ -281,13 +287,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 5,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -309,7 +316,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))",
|
||||
"expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "rate",
|
||||
@ -362,6 +369,7 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 6,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
@ -390,7 +398,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m])) by (instance, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
@ -456,13 +464,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 7,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -484,28 +493,28 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "2xx",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "3xx",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "4xx",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "5xx",
|
||||
@ -558,13 +567,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 8,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -586,7 +596,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[5m])) by (verb, url, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{verb}} {{url}}",
|
||||
@ -652,6 +662,7 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 9,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
@ -680,7 +691,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{verb}} {{url}}",
|
||||
@ -746,13 +757,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 10,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -827,13 +839,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 11,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -855,7 +868,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}[5m])",
|
||||
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
@ -908,13 +921,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 12,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -1002,7 +1016,7 @@
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
@ -1020,7 +1034,7 @@
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(kube_pod_info, cluster)",
|
||||
"query": "label_values(up{job=\"kube-proxy\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
@ -1040,7 +1054,7 @@
|
||||
"multi": false,
|
||||
"name": "instance",
|
||||
"options": [ ],
|
||||
"query": "label_values(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\"}, instance)",
|
||||
"query": "label_values(up{job=\"kube-proxy\", cluster=\"$cluster\", job=\"kube-proxy\"}, instance)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
|
@ -36,7 +36,11 @@
|
||||
},
|
||||
"gridPos": { },
|
||||
"id": 2,
|
||||
"interval": null,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"rightSide": true
|
||||
},
|
||||
"links": [ ],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
@ -106,6 +110,7 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 3,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
@ -134,28 +139,28 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
|
||||
"expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}} {{instance}} e2e",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
|
||||
"expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}} {{instance}} binding",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
|
||||
"expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}} {{instance}} scheduling algorithm",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
|
||||
"expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}} {{instance}} volume",
|
||||
@ -208,6 +213,7 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
@ -236,28 +242,28 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}} {{instance}} e2e",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}} {{instance}} binding",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}} {{instance}} scheduling algorithm",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}} {{instance}} volume",
|
||||
@ -323,13 +329,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 5,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -351,28 +358,28 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "2xx",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "3xx",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "4xx",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
|
||||
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "5xx",
|
||||
@ -425,13 +432,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 6,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -453,7 +461,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{verb}} {{url}}",
|
||||
@ -519,6 +527,7 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 7,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
@ -547,7 +556,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{verb}} {{url}}",
|
||||
@ -613,13 +622,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 8,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -694,13 +704,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 9,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -722,7 +733,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])",
|
||||
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
@ -775,13 +786,14 @@
|
||||
"fillGradient": 0,
|
||||
"gridPos": { },
|
||||
"id": 10,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
@ -869,7 +881,7 @@
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
@ -907,7 +919,7 @@
|
||||
"multi": false,
|
||||
"name": "instance",
|
||||
"options": [ ],
|
||||
"query": "label_values(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\"}, instance)",
|
||||
"query": "label_values(up{job=\"kube-scheduler\", cluster=\"$cluster\"}, instance)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
|
@ -89,7 +89,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "sort_desc(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{ pod }}",
|
||||
@ -184,7 +184,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{ pod }}",
|
||||
@ -290,7 +290,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "sort_desc(avg(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{ pod }}",
|
||||
@ -385,7 +385,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{ pod }}",
|
||||
@ -506,7 +506,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "sort_desc(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -597,7 +597,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -699,7 +699,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "sort_desc(sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -790,7 +790,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "sort_desc(sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -901,7 +901,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -992,7 +992,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1061,7 +1061,7 @@
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
@ -1079,7 +1079,7 @@
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(kube_pod_info, cluster)",
|
||||
"query": "label_values(kube_pod_info{job=\"kube-state-metrics\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
@ -1099,14 +1099,14 @@
|
||||
"value": "kube-system"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)",
|
||||
"definition": "label_values(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\"}, namespace)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)",
|
||||
"query": "label_values(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\"}, namespace)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
|
@ -8,7 +8,7 @@
|
||||
"subdir": "jsonnet/kube-prometheus"
|
||||
}
|
||||
},
|
||||
"version": "release-0.9"
|
||||
"version": "release-0.11"
|
||||
}
|
||||
],
|
||||
"legacyImports": true
|
||||
|
@ -8,8 +8,8 @@
|
||||
"subdir": "grafana"
|
||||
}
|
||||
},
|
||||
"version": "90f38916f1f8a310a715d18e36f787f84df4ddf5",
|
||||
"sum": "0kZ1pnuIirDtbg6F9at5+NQOwKNONIGEPq0eECzvRkI="
|
||||
"version": "d039275e4916aceae1c137120882e01d857787ac",
|
||||
"sum": "515vMn4x4tP8vegL4HLW0nDO5+njGTgnDZB5OOhtsCI="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -18,9 +18,19 @@
|
||||
"subdir": "contrib/mixin"
|
||||
}
|
||||
},
|
||||
"version": "74aa38ec10bc22d34ffd204f46df6e460b78d855",
|
||||
"version": "19002cfc689fba2b8f56605e5797bf79f8b61fdd",
|
||||
"sum": "W/Azptf1PoqjyMwJON96UY69MFugDA4IAYiKURscryc="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/grafana/grafana.git",
|
||||
"subdir": "grafana-mixin"
|
||||
}
|
||||
},
|
||||
"version": "3eed09056849ab873b867b561b7ce580ef2c75ba",
|
||||
"sum": "MkjR7zCgq6MUZgjDzop574tFKoTX2OBr7DTwm1K+Ofs="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
@ -38,7 +48,7 @@
|
||||
"subdir": "grafana-builder"
|
||||
}
|
||||
},
|
||||
"version": "c132c4afcf17491718539db4c2d94c0ea4346120",
|
||||
"version": "dbf6fc14105c28b6fd0253005f7ca2da37d3d4e1",
|
||||
"sum": "tDR6yT2GVfw0wTU12iZH+m01HrbIr6g/xN+/8nzNkU0="
|
||||
},
|
||||
{
|
||||
@ -48,8 +58,8 @@
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "fb9d8ed4bc4a3d6efac525f72e8a0d2c583a0fe2",
|
||||
"sum": "xjKkdp+5fkekCNBUIgZCHTRmVdUEmQNFKslrL2Ho8gs="
|
||||
"version": "b8f44bb7be728423836bef0e904ec7166895a34b",
|
||||
"sum": "LCgSosxceeYuoau5fYSPtE5eXOFe46DxexfkrctUv7c="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -58,7 +68,7 @@
|
||||
"subdir": "lib/promgrafonnet"
|
||||
}
|
||||
},
|
||||
"version": "eb98d4f74e8ac9c30b1f0e815b07bed31da76c8f",
|
||||
"version": "5e44626d70c2bf2d35c37f3fee5a6261a5335cc6",
|
||||
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
|
||||
},
|
||||
{
|
||||
@ -68,8 +78,8 @@
|
||||
"subdir": "jsonnet/kube-state-metrics"
|
||||
}
|
||||
},
|
||||
"version": "f170cc73f11c1580d7f38af746be0f2fa79c6a1e",
|
||||
"sum": "S5qI+PJUdNeYOv76jH5nxwYS9N6U7CRxvyuB1wI4cTE="
|
||||
"version": "0567e1e1b981755e563d2244fa1659563f2cddbc",
|
||||
"sum": "P0dCnbzyPScQGNXwXRcwiPkMLeTq0IPNbSTysDbySnM="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -78,7 +88,7 @@
|
||||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
||||
}
|
||||
},
|
||||
"version": "f170cc73f11c1580d7f38af746be0f2fa79c6a1e",
|
||||
"version": "0567e1e1b981755e563d2244fa1659563f2cddbc",
|
||||
"sum": "u8gaydJoxEjzizQ8jY8xSjYgWooPmxw+wIWdDxifMAk="
|
||||
},
|
||||
{
|
||||
@ -88,8 +98,8 @@
|
||||
"subdir": "jsonnet/kube-prometheus"
|
||||
}
|
||||
},
|
||||
"version": "452aaed72e36acb31cae93cfa85a5d9c3d3d2ec7",
|
||||
"sum": "pupXEvlRbhLdEO9b8LfFZB66+Z7fEqvRZ9m3MyEvsv4="
|
||||
"version": "e3066575dc8be21f578f12887563bda3ee7a2eff",
|
||||
"sum": "nNEMDrb5sQDOxJ20ITDvldyfIbbiGcVr8Bq46PH2ww8="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -98,8 +108,8 @@
|
||||
"subdir": "jsonnet/mixin"
|
||||
}
|
||||
},
|
||||
"version": "83fe36566f4e0894eb5ffcd2638a0f039a17bdeb",
|
||||
"sum": "6reUygVmQrLEWQzTKcH8ceDbvM+2ztK3z2VBR2K2l+U=",
|
||||
"version": "5db6996d3ca995e66301c53c33959fd64c3f6ae6",
|
||||
"sum": "GQmaVFJwKMiD/P4n3N2LrAZVcwutriWrP8joclDtBYQ=",
|
||||
"name": "prometheus-operator-mixin"
|
||||
},
|
||||
{
|
||||
@ -109,8 +119,8 @@
|
||||
"subdir": "jsonnet/prometheus-operator"
|
||||
}
|
||||
},
|
||||
"version": "83fe36566f4e0894eb5ffcd2638a0f039a17bdeb",
|
||||
"sum": "J1G++A8hrtr3+OZQMmcNeb1w/C30bXqqwpwHL/Xhsd4="
|
||||
"version": "5db6996d3ca995e66301c53c33959fd64c3f6ae6",
|
||||
"sum": "pUggCYwO/3Y/p6Vgryx8Y4KO3QkJ+GqimrZtn/luzzI="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -119,8 +129,8 @@
|
||||
"subdir": "doc/alertmanager-mixin"
|
||||
}
|
||||
},
|
||||
"version": "b408b522bc653d014e53035e59fa394cc1edd762",
|
||||
"sum": "pep+dHzfIjh2SU5pEkwilMCAT/NoL6YYflV4x8cr7vU=",
|
||||
"version": "14b01e6a34dd3155768c7e9bd5c4376055de9419",
|
||||
"sum": "f3iZDUXQ/YWB5yDCY7VLD5bs442+3CdJgXJhJyWhNf8=",
|
||||
"name": "alertmanager"
|
||||
},
|
||||
{
|
||||
@ -130,8 +140,8 @@
|
||||
"subdir": "docs/node-mixin"
|
||||
}
|
||||
},
|
||||
"version": "832909dd257eb368cf83363ffcae3ab84cb4bcb1",
|
||||
"sum": "MmxGhE2PJ1a52mk2x7vDpMT2at4Jglbud/rK74CB5i0="
|
||||
"version": "a2321e7b940ddcff26873612bccdf7cd4c42b6b6",
|
||||
"sum": "MlWDAKGZ+JArozRKdKEvewHeWn8j2DNBzesJfLVd0dk="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -140,10 +150,20 @@
|
||||
"subdir": "documentation/prometheus-mixin"
|
||||
}
|
||||
},
|
||||
"version": "751ca03faddc9c64089c41d0da370a3a0b477742",
|
||||
"sum": "AS8WYFi/z10BZSF6DFkKBscjB32XDMM7iIso7CO/FyI=",
|
||||
"version": "d7e7b8e04b5ecdc1dd153534ba376a622b72741b",
|
||||
"sum": "APXOIP3B3dZ3Tyh7L2UhyWR8Vbf5+9adTLz/ya7n6uU=",
|
||||
"name": "prometheus"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/pyrra-dev/pyrra.git",
|
||||
"subdir": "config/crd/bases"
|
||||
}
|
||||
},
|
||||
"version": "3738a607a42a0c9566587a49cec7587cc92d61bd",
|
||||
"sum": "GQ0GFKGdIWKx1b78VRs6jtC4SMqkBjT5jl65QUjPKK4="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
@ -151,8 +171,8 @@
|
||||
"subdir": "mixin"
|
||||
}
|
||||
},
|
||||
"version": "ff363498fc95cfe17de894d7237bcf38bdd0bc36",
|
||||
"sum": "cajthvLKDjYgYHCKQU2g/pTMRkxcbuJEvTnCyJOihl8=",
|
||||
"version": "17c576472d80972bfd3705e1e0a08e6f8da8e04b",
|
||||
"sum": "dBm9ML50quhu6dwTIgfNmVruMqfaUeQVCO/6EKtQLxE=",
|
||||
"name": "thanos-mixin"
|
||||
}
|
||||
],
|
||||
|
@ -6,10 +6,10 @@ dashboards:
|
||||
url: https://grafana.com/api/dashboards/12539/revisions/5/download
|
||||
tags: ['kubernetes', 'DNS']
|
||||
- name: etcd
|
||||
url: https://grafana.com/api/dashboards/3070/revisions/3/download
|
||||
url: https://grafana.com/api/dashboards/15308/revisions/1/download
|
||||
tags: ['kubernetes', 'etcd']
|
||||
- name: node
|
||||
url: https://grafana.com/api/dashboards/1860/revisions/23/download
|
||||
url: https://grafana.com/api/dashboards/1860/revisions/27/download
|
||||
tags: ['kubernetes']
|
||||
# cd dashboards; for f in *.json; do echo "- name: ${f%%.json}" >> ../dashboards.yaml; echo " url: file://dashboards/$f" >> ../dashboards.yaml; done; cd -
|
||||
- name: apiserver
|
||||
|
@ -4,9 +4,10 @@
|
||||
"metadata": {
|
||||
"labels": {
|
||||
"app.kubernetes.io/component": "alert-router",
|
||||
"app.kubernetes.io/instance": "main",
|
||||
"app.kubernetes.io/name": "alertmanager",
|
||||
"app.kubernetes.io/part-of": "kube-prometheus",
|
||||
"app.kubernetes.io/version": "0.22.2",
|
||||
"app.kubernetes.io/version": "0.24.0",
|
||||
"prometheus": "k8s",
|
||||
"role": "alert-rules"
|
||||
},
|
||||
|
@ -41,6 +41,18 @@
|
||||
"labels": {
|
||||
"severity": "none"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "InfoInhibitor",
|
||||
"annotations": {
|
||||
"description": "This is an alert that is used to inhibit info alerts.\nBy themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with\nother alerts.\nThis alert fires whenever there's a severity=\"info\" alert, and stops firing when another alert with a\nseverity of 'warning' or 'critical' starts firing on the same namespace.\nThis alert should be routed to a null receiver and configured to inhibit alerts with severity=\"info\".\n",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor",
|
||||
"summary": "Info-level alert inhibition."
|
||||
},
|
||||
"expr": "ALERTS{severity = \"info\"} == 1 unless on(namespace) ALERTS{alertname != \"InfoInhibitor\", severity =~ \"warning|critical\", alertstate=\"firing\"} == 1",
|
||||
"labels": {
|
||||
"severity": "none"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -86,7 +98,7 @@
|
||||
"record": "cluster:node_cpu:sum_rate5m"
|
||||
},
|
||||
{
|
||||
"expr": "cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))",
|
||||
"expr": "cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))",
|
||||
"record": "cluster:node_cpu:ratio"
|
||||
}
|
||||
]
|
||||
|
@ -6,7 +6,7 @@
|
||||
"app.kubernetes.io/component": "exporter",
|
||||
"app.kubernetes.io/name": "kube-state-metrics",
|
||||
"app.kubernetes.io/part-of": "kube-prometheus",
|
||||
"app.kubernetes.io/version": "2.1.1",
|
||||
"app.kubernetes.io/version": "2.5.0",
|
||||
"prometheus": "k8s",
|
||||
"role": "alert-rules"
|
||||
},
|
||||
|
@ -36,7 +36,7 @@
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready",
|
||||
"summary": "Pod has been in a non-ready state for more than 15 minutes."
|
||||
},
|
||||
"expr": "sum by (namespace, pod) (\n max by(namespace, pod) (\n kube_pod_status_phase{job=\"kube-state-metrics\", phase=~\"Pending|Unknown\"}\n ) * on(namespace, pod) group_left(owner_kind) topk by(namespace, pod) (\n 1, max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!=\"Job\"})\n )\n) > 0\n",
|
||||
"expr": "sum by (namespace, pod, cluster) (\n max by(namespace, pod, cluster) (\n kube_pod_status_phase{job=\"kube-state-metrics\", phase=~\"Pending|Unknown\"}\n ) * on(namespace, pod, cluster) group_left(owner_kind) topk by(namespace, pod, cluster) (\n 1, max by(namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!=\"Job\"})\n )\n) > 0\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -114,7 +114,7 @@
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck",
|
||||
"summary": "DaemonSet rollout is stuck."
|
||||
},
|
||||
"expr": "(\n (\n kube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_misscheduled{job=\"kube-state-metrics\"}\n !=\n 0\n ) or (\n kube_daemonset_updated_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_available{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n )\n) and (\n changes(kube_daemonset_updated_number_scheduled{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n",
|
||||
"expr": "(\n (\n kube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_misscheduled{job=\"kube-state-metrics\"}\n !=\n 0\n ) or (\n kube_daemonset_status_updated_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_available{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n )\n) and (\n changes(kube_daemonset_status_updated_number_scheduled{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -123,11 +123,11 @@
|
||||
{
|
||||
"alert": "KubeContainerWaiting",
|
||||
"annotations": {
|
||||
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} has been in waiting state for longer than 1 hour.",
|
||||
"description": "pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container {{ $labels.container}} has been in waiting state for longer than 1 hour.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting",
|
||||
"summary": "Pod container waiting longer than 1 hour"
|
||||
},
|
||||
"expr": "sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job=\"kube-state-metrics\"}) > 0\n",
|
||||
"expr": "sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job=\"kube-state-metrics\"}) > 0\n",
|
||||
"for": "1h",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -160,14 +160,13 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeJobCompletion",
|
||||
"alert": "KubeJobNotCompleted",
|
||||
"annotations": {
|
||||
"description": "Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than 12 hours to complete.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobcompletion",
|
||||
"description": "Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than {{ \"43200\" | humanizeDuration }} to complete.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted",
|
||||
"summary": "Job did not complete in time"
|
||||
},
|
||||
"expr": "kube_job_spec_completions{job=\"kube-state-metrics\"} - kube_job_status_succeeded{job=\"kube-state-metrics\"} > 0\n",
|
||||
"for": "12h",
|
||||
"expr": "time() - max by(namespace, job_name, cluster) (kube_job_status_start_time{job=\"kube-state-metrics\"}\n and\nkube_job_status_active{job=\"kube-state-metrics\"} > 0) > 43200\n",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
@ -232,7 +231,7 @@
|
||||
{
|
||||
"alert": "KubeMemoryOvercommit",
|
||||
"annotations": {
|
||||
"description": "Cluster has overcommitted memory resource requests for Pods by {{ $value }} bytes and cannot tolerate node failure.",
|
||||
"description": "Cluster has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit",
|
||||
"summary": "Cluster has overcommitted memory resource requests."
|
||||
},
|
||||
@ -249,7 +248,7 @@
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit",
|
||||
"summary": "Cluster has overcommitted CPU resource requests."
|
||||
},
|
||||
"expr": "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"cpu\"})\n /\nsum(kube_node_status_allocatable{resource=\"cpu\"})\n > 1.5\n",
|
||||
"expr": "sum(min without(resource) (kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=~\"(cpu|requests.cpu)\"}))\n /\nsum(kube_node_status_allocatable{resource=\"cpu\", job=\"kube-state-metrics\"})\n > 1.5\n",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -262,7 +261,7 @@
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit",
|
||||
"summary": "Cluster has overcommitted memory resource requests."
|
||||
},
|
||||
"expr": "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"memory\"})\n /\nsum(kube_node_status_allocatable{resource=\"memory\",job=\"kube-state-metrics\"})\n > 1.5\n",
|
||||
"expr": "sum(min without(resource) (kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=~\"(memory|requests.memory)\"}))\n /\nsum(kube_node_status_allocatable{resource=\"memory\", job=\"kube-state-metrics\"})\n > 1.5\n",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -332,7 +331,7 @@
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup",
|
||||
"summary": "PersistentVolume is filling up."
|
||||
},
|
||||
"expr": "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\n",
|
||||
"expr": "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n",
|
||||
"for": "1m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
@ -345,7 +344,33 @@
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup",
|
||||
"summary": "PersistentVolume is filling up."
|
||||
},
|
||||
"expr": "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\n",
|
||||
"expr": "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n",
|
||||
"for": "1h",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubePersistentVolumeInodesFillingUp",
|
||||
"annotations": {
|
||||
"description": "The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} only has {{ $value | humanizePercentage }} free inodes.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup",
|
||||
"summary": "PersistentVolumeInodes are filling up."
|
||||
},
|
||||
"expr": "(\n kubelet_volume_stats_inodes_free{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_inodes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_inodes_used{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n",
|
||||
"for": "1m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubePersistentVolumeInodesFillingUp",
|
||||
"annotations": {
|
||||
"description": "Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to run out of inodes within four days. Currently {{ $value | humanizePercentage }} of its inodes are free.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup",
|
||||
"summary": "PersistentVolumeInodes are filling up."
|
||||
},
|
||||
"expr": "(\n kubelet_volume_stats_inodes_free{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_inodes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_inodes_used{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_inodes_free{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n",
|
||||
"for": "1h",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -376,7 +401,7 @@
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch",
|
||||
"summary": "Different semantic versions of Kubernetes components running."
|
||||
},
|
||||
"expr": "count(count by (git_version) (label_replace(kubernetes_build_info{job!~\"kube-dns|coredns\"},\"git_version\",\"$1\",\"git_version\",\"(v[0-9]*.[0-9]*).*\"))) > 1\n",
|
||||
"expr": "count by (cluster) (count by (git_version, cluster) (label_replace(kubernetes_build_info{job!~\"kube-dns|coredns\"},\"git_version\",\"$1\",\"git_version\",\"(v[0-9]*.[0-9]*).*\"))) > 1\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -389,7 +414,7 @@
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors",
|
||||
"summary": "Kubernetes API server client is experiencing errors."
|
||||
},
|
||||
"expr": "(sum(rate(rest_client_requests_total{code=~\"5..\"}[5m])) by (instance, job, namespace)\n /\nsum(rate(rest_client_requests_total[5m])) by (instance, job, namespace))\n> 0.01\n",
|
||||
"expr": "(sum(rate(rest_client_requests_total{code=~\"5..\"}[5m])) by (cluster, instance, job, namespace)\n /\nsum(rate(rest_client_requests_total[5m])) by (cluster, instance, job, namespace))\n> 0.01\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -468,7 +493,7 @@
|
||||
{
|
||||
"alert": "KubeClientCertificateExpiration",
|
||||
"annotations": {
|
||||
"description": "A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.",
|
||||
"description": "A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration",
|
||||
"summary": "Client certificate is about to expire."
|
||||
},
|
||||
@ -480,7 +505,7 @@
|
||||
{
|
||||
"alert": "KubeClientCertificateExpiration",
|
||||
"annotations": {
|
||||
"description": "A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.",
|
||||
"description": "A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration",
|
||||
"summary": "Client certificate is about to expire."
|
||||
},
|
||||
@ -490,25 +515,25 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "AggregatedAPIErrors",
|
||||
"alert": "KubeAggregatedAPIErrors",
|
||||
"annotations": {
|
||||
"description": "An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/aggregatedapierrors",
|
||||
"summary": "An aggregated API has reported errors."
|
||||
"description": "Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors",
|
||||
"summary": "Kubernetes aggregated API has reported errors."
|
||||
},
|
||||
"expr": "sum by(name, namespace)(increase(aggregator_unavailable_apiservice_total[10m])) > 4\n",
|
||||
"expr": "sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total[10m])) > 4\n",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "AggregatedAPIDown",
|
||||
"alert": "KubeAggregatedAPIDown",
|
||||
"annotations": {
|
||||
"description": "An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/aggregatedapidown",
|
||||
"summary": "An aggregated API is down."
|
||||
"description": "Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown",
|
||||
"summary": "Kubernetes aggregated API is down."
|
||||
},
|
||||
"expr": "(1 - max by(name, namespace)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85\n",
|
||||
"expr": "(1 - max by(name, namespace, cluster)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85\n",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -530,9 +555,9 @@
|
||||
{
|
||||
"alert": "KubeAPITerminatedRequests",
|
||||
"annotations": {
|
||||
"description": "The apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.",
|
||||
"description": "The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests",
|
||||
"summary": "The apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests."
|
||||
"summary": "The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests."
|
||||
},
|
||||
"expr": "sum(rate(apiserver_request_terminations_total{job=\"apiserver\"}[10m])) / ( sum(rate(apiserver_request_total{job=\"apiserver\"}[10m])) + sum(rate(apiserver_request_terminations_total{job=\"apiserver\"}[10m])) ) > 0.20\n",
|
||||
"for": "5m",
|
||||
@ -578,10 +603,10 @@
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods",
|
||||
"summary": "Kubelet is running at capacity."
|
||||
},
|
||||
"expr": "count by(node) (\n (kube_pod_status_phase{job=\"kube-state-metrics\",phase=\"Running\"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job=\"kube-state-metrics\"})\n)\n/\nmax by(node) (\n kube_node_status_capacity{job=\"kube-state-metrics\",resource=\"pods\"} != 1\n) > 0.95\n",
|
||||
"expr": "count by(cluster, node) (\n (kube_pod_status_phase{job=\"kube-state-metrics\",phase=\"Running\"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job=\"kube-state-metrics\"})\n)\n/\nmax by(cluster, node) (\n kube_node_status_capacity{job=\"kube-state-metrics\",resource=\"pods\"} != 1\n) > 0.95\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
"severity": "info"
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -591,7 +616,7 @@
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping",
|
||||
"summary": "Node readiness status is flapping."
|
||||
},
|
||||
"expr": "sum(changes(kube_node_status_condition{status=\"true\",condition=\"Ready\"}[15m])) by (node) > 2\n",
|
||||
"expr": "sum(changes(kube_node_status_condition{status=\"true\",condition=\"Ready\"}[15m])) by (cluster, node) > 2\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -617,7 +642,7 @@
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh",
|
||||
"summary": "Kubelet Pod startup latency is too high."
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job=\"kubelet\", metrics_path=\"/metrics\"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"} > 60\n",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job=\"kubelet\", metrics_path=\"/metrics\"}[5m])) by (cluster, instance, le)) * on(cluster, instance) group_left(node) kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"} > 60\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -752,98 +777,98 @@
|
||||
"name": "kube-apiserver-burnrate.rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate1d"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate1h"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate2h"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate30m"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate3d"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate5m"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate6h"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate1d"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate1h"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate2h"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate30m"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate3d"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate5m"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
@ -855,41 +880,20 @@
|
||||
"name": "kube-apiserver-histogram.rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))) > 0\n",
|
||||
"expr": "histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))) > 0\n",
|
||||
"labels": {
|
||||
"quantile": "0.99",
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile"
|
||||
"record": "cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))) > 0\n",
|
||||
"expr": "histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))) > 0\n",
|
||||
"labels": {
|
||||
"quantile": "0.99",
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance, pod))\n",
|
||||
"labels": {
|
||||
"quantile": "0.99"
|
||||
},
|
||||
"record": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance, pod))\n",
|
||||
"labels": {
|
||||
"quantile": "0.9"
|
||||
},
|
||||
"record": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance, pod))\n",
|
||||
"labels": {
|
||||
"quantile": "0.5"
|
||||
},
|
||||
"record": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile"
|
||||
"record": "cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile"
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -916,21 +920,37 @@
|
||||
"record": "code:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "1 - (\n (\n # write too slow\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~\"POST|PUT|PATCH|DELETE\"}[30d]))\n -\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30d]))\n ) +\n (\n # read too slow\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~\"LIST|GET\"}[30d]))\n -\n (\n (\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30d]))\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30d]))\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n",
|
||||
"expr": "sum by (cluster, verb, scope) (increase(apiserver_request_slo_duration_seconds_count[1h]))\n",
|
||||
"record": "cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase1h"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster, verb, scope) (avg_over_time(cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase1h[30d]) * 24 * 30)\n",
|
||||
"record": "cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster, verb, scope, le) (increase(apiserver_request_slo_duration_seconds_bucket[1h]))\n",
|
||||
"record": "cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase1h"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster, verb, scope, le) (avg_over_time(cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase1h[30d]) * 24 * 30)\n",
|
||||
"record": "cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n",
|
||||
"labels": {
|
||||
"verb": "all"
|
||||
},
|
||||
"record": "apiserver_request:availability30d"
|
||||
},
|
||||
{
|
||||
"expr": "1 - (\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30d]))\n -\n (\n # too slow\n (\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30d]))\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30d]))\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n",
|
||||
"expr": "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:availability30d"
|
||||
},
|
||||
{
|
||||
"expr": "1 - (\n (\n # too slow\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~\"POST|PUT|PATCH|DELETE\"}[30d]))\n -\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30d]))\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n",
|
||||
"expr": "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
@ -992,7 +1012,7 @@
|
||||
"record": "node_namespace_pod_container:container_memory_swap"
|
||||
},
|
||||
{
|
||||
"expr": "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
|
||||
"expr": "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
|
||||
"record": "cluster:namespace:pod_memory:active:kube_pod_container_resource_requests"
|
||||
},
|
||||
{
|
||||
@ -1000,7 +1020,7 @@
|
||||
"record": "namespace_memory:kube_pod_container_resource_requests:sum"
|
||||
},
|
||||
{
|
||||
"expr": "kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
|
||||
"expr": "kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
|
||||
"record": "cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests"
|
||||
},
|
||||
{
|
||||
@ -1008,7 +1028,7 @@
|
||||
"record": "namespace_cpu:kube_pod_container_resource_requests:sum"
|
||||
},
|
||||
{
|
||||
"expr": "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
|
||||
"expr": "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
|
||||
"record": "cluster:namespace:pod_memory:active:kube_pod_container_resource_limits"
|
||||
},
|
||||
{
|
||||
@ -1016,7 +1036,7 @@
|
||||
"record": "namespace_memory:kube_pod_container_resource_limits:sum"
|
||||
},
|
||||
{
|
||||
"expr": "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n )\n",
|
||||
"expr": "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n )\n",
|
||||
"record": "cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits"
|
||||
},
|
||||
{
|
||||
@ -1043,6 +1063,13 @@
|
||||
"workload_type": "statefulset"
|
||||
},
|
||||
"record": "namespace_workload_pod:kube_pod_owner:relabel"
|
||||
},
|
||||
{
|
||||
"expr": "max by (cluster, namespace, workload, pod) (\n label_replace(\n kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"Job\"},\n \"workload\", \"$1\", \"owner_name\", \"(.*)\"\n )\n)\n",
|
||||
"labels": {
|
||||
"workload_type": "job"
|
||||
},
|
||||
"record": "namespace_workload_pod:kube_pod_owner:relabel"
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -1118,7 +1145,7 @@
|
||||
"name": "node.rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "topk by(namespace, pod) (1,\n max by (node, namespace, pod) (\n label_replace(kube_pod_info{job=\"kube-state-metrics\",node!=\"\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")\n))\n",
|
||||
"expr": "topk by(cluster, namespace, pod) (1,\n max by (cluster, node, namespace, pod) (\n label_replace(kube_pod_info{job=\"kube-state-metrics\",node!=\"\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")\n))\n",
|
||||
"record": "node_namespace_pod:kube_pod_info:"
|
||||
},
|
||||
{
|
||||
@ -1128,6 +1155,10 @@
|
||||
{
|
||||
"expr": "sum(\n node_memory_MemAvailable_bytes{job=\"node-exporter\"} or\n (\n node_memory_Buffers_bytes{job=\"node-exporter\"} +\n node_memory_Cached_bytes{job=\"node-exporter\"} +\n node_memory_MemFree_bytes{job=\"node-exporter\"} +\n node_memory_Slab_bytes{job=\"node-exporter\"}\n )\n) by (cluster)\n",
|
||||
"record": ":node_memory_MemAvailable_bytes:sum"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(node_cpu_seconds_total{job=\"node-exporter\",mode!=\"idle\",mode!=\"iowait\",mode!=\"steal\"}[5m])) /\ncount(sum(node_cpu_seconds_total{job=\"node-exporter\"}) by (cluster, instance, cpu))\n",
|
||||
"record": "cluster:node_cpu:ratio_rate5m"
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -1135,21 +1166,21 @@
|
||||
"name": "kubelet.rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"})\n",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (cluster, instance, le) * on(cluster, instance) group_left(node) kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"})\n",
|
||||
"labels": {
|
||||
"quantile": "0.99"
|
||||
},
|
||||
"record": "node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"})\n",
|
||||
"expr": "histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (cluster, instance, le) * on(cluster, instance) group_left(node) kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"})\n",
|
||||
"labels": {
|
||||
"quantile": "0.9"
|
||||
},
|
||||
"record": "node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"})\n",
|
||||
"expr": "histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (cluster, instance, le) * on(cluster, instance) group_left(node) kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"})\n",
|
||||
"labels": {
|
||||
"quantile": "0.5"
|
||||
},
|
||||
|
@ -6,7 +6,7 @@
|
||||
"app.kubernetes.io/component": "exporter",
|
||||
"app.kubernetes.io/name": "node-exporter",
|
||||
"app.kubernetes.io/part-of": "kube-prometheus",
|
||||
"app.kubernetes.io/version": "1.2.2",
|
||||
"app.kubernetes.io/version": "1.3.1",
|
||||
"prometheus": "k8s",
|
||||
"role": "alert-rules"
|
||||
},
|
||||
@ -25,7 +25,7 @@
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup",
|
||||
"summary": "Filesystem is predicted to run out of space within the next 24 hours."
|
||||
},
|
||||
"expr": "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 40\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 24*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n",
|
||||
"expr": "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 15\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 24*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n",
|
||||
"for": "1h",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -38,7 +38,7 @@
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup",
|
||||
"summary": "Filesystem is predicted to run out of space within the next 4 hours."
|
||||
},
|
||||
"expr": "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 15\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 4*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n",
|
||||
"expr": "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 10\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 4*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n",
|
||||
"for": "1h",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
@ -255,11 +255,11 @@
|
||||
"name": "node-exporter.rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "count without (cpu) (\n count without (mode) (\n node_cpu_seconds_total{job=\"node-exporter\"}\n )\n)\n",
|
||||
"expr": "count without (cpu, mode) (\n node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}\n)\n",
|
||||
"record": "instance:node_num_cpu:sum"
|
||||
},
|
||||
{
|
||||
"expr": "1 - avg without (cpu, mode) (\n rate(node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\"}[5m])\n)\n",
|
||||
"expr": "1 - avg without (cpu) (\n sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\"}[5m]))\n)\n",
|
||||
"record": "instance:node_cpu_utilisation:rate5m"
|
||||
},
|
||||
{
|
||||
@ -267,7 +267,7 @@
|
||||
"record": "instance:node_load1_per_cpu:ratio"
|
||||
},
|
||||
{
|
||||
"expr": "1 - (\n node_memory_MemAvailable_bytes{job=\"node-exporter\"}\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\"}\n)\n",
|
||||
"expr": "1 - (\n (\n node_memory_MemAvailable_bytes{job=\"node-exporter\"}\n or\n (\n node_memory_Buffers_bytes{job=\"node-exporter\"}\n +\n node_memory_Cached_bytes{job=\"node-exporter\"}\n +\n node_memory_MemFree_bytes{job=\"node-exporter\"}\n +\n node_memory_Slab_bytes{job=\"node-exporter\"}\n )\n )\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\"}\n)\n",
|
||||
"record": "instance:node_memory_utilisation:ratio"
|
||||
},
|
||||
{
|
||||
|
@ -6,7 +6,7 @@
|
||||
"app.kubernetes.io/component": "controller",
|
||||
"app.kubernetes.io/name": "prometheus-operator",
|
||||
"app.kubernetes.io/part-of": "kube-prometheus",
|
||||
"app.kubernetes.io/version": "0.49.0",
|
||||
"app.kubernetes.io/version": "0.57.0",
|
||||
"prometheus": "k8s",
|
||||
"role": "alert-rules"
|
||||
},
|
||||
@ -38,7 +38,7 @@
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorwatcherrors",
|
||||
"summary": "Errors while performing watch operations in controller."
|
||||
},
|
||||
"expr": "(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job=\"prometheus-operator\",namespace=\"monitoring\"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job=\"prometheus-operator\",namespace=\"monitoring\"}[10m]))) > 0.4\n",
|
||||
"expr": "(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job=\"prometheus-operator\",namespace=\"monitoring\"}[5m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job=\"prometheus-operator\",namespace=\"monitoring\"}[5m]))) > 0.4\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -90,7 +90,7 @@
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatornotready",
|
||||
"summary": "Prometheus operator not ready"
|
||||
},
|
||||
"expr": "min by(namespace, controller) (max_over_time(prometheus_operator_ready{job=\"prometheus-operator\",namespace=\"monitoring\"}[5m]) == 0)\n",
|
||||
"expr": "min by (controller,namespace) (max_over_time(prometheus_operator_ready{job=\"prometheus-operator\",namespace=\"monitoring\"}[5m]) == 0)\n",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -110,6 +110,24 @@
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "config-reloaders",
|
||||
"rules": [
|
||||
{
|
||||
"alert": "ConfigReloaderSidecarErrors",
|
||||
"annotations": {
|
||||
"description": "Errors encountered while the {{$labels.pod}} config-reloader sidecar attempts to sync config in {{$labels.namespace}} namespace.\nAs a result, configuration for service running in {{$labels.pod}} may be stale and cannot be updated anymore.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/configreloadersidecarerrors",
|
||||
"summary": "config-reloader sidecar has not had a successful reload for 10m"
|
||||
},
|
||||
"expr": "max_over_time(reloader_last_reload_successful{namespace=~\".+\"}[5m]) == 0\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -4,9 +4,10 @@
|
||||
"metadata": {
|
||||
"labels": {
|
||||
"app.kubernetes.io/component": "prometheus",
|
||||
"app.kubernetes.io/instance": "k8s",
|
||||
"app.kubernetes.io/name": "prometheus",
|
||||
"app.kubernetes.io/part-of": "kube-prometheus",
|
||||
"app.kubernetes.io/version": "2.29.1",
|
||||
"app.kubernetes.io/version": "2.36.1",
|
||||
"prometheus": "k8s",
|
||||
"role": "alert-rules"
|
||||
},
|
||||
@ -226,6 +227,32 @@
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "PrometheusScrapeBodySizeLimitHit",
|
||||
"annotations": {
|
||||
"description": "Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf \"%.0f\" $value }} scrapes in the last 5m because some targets exceeded the configured body_size_limit.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapebodysizelimithit",
|
||||
"summary": "Prometheus has dropped some targets that exceeded body size limit."
|
||||
},
|
||||
"expr": "increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m]) > 0\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "PrometheusScrapeSampleLimitHit",
|
||||
"annotations": {
|
||||
"description": "Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf \"%.0f\" $value }} scrapes in the last 5m because some targets exceeded the configured sample_limit.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapesamplelimithit",
|
||||
"summary": "Prometheus has failed scrapes that have exceeded the configured sample limit."
|
||||
},
|
||||
"expr": "increase(prometheus_target_scrapes_exceeded_sample_limit_total{job=\"prometheus-k8s\",namespace=\"monitoring\"}[5m]) > 0\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "PrometheusTargetSyncFailure",
|
||||
"annotations": {
|
||||
|
File diff suppressed because one or more lines are too long
@ -36,6 +36,26 @@ spec:
|
||||
expr: vector(1)
|
||||
labels:
|
||||
severity: none
|
||||
- alert: InfoInhibitor
|
||||
annotations:
|
||||
description: 'This is an alert that is used to inhibit info alerts.
|
||||
|
||||
By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with
|
||||
|
||||
other alerts.
|
||||
|
||||
This alert fires whenever there''s a severity="info" alert, and stops firing when another alert with a
|
||||
|
||||
severity of ''warning'' or ''critical'' starts firing on the same namespace.
|
||||
|
||||
This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
|
||||
|
||||
'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor
|
||||
summary: Info-level alert inhibition.
|
||||
expr: ALERTS{severity = "info"} == 1 unless on(namespace) ALERTS{alertname != "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1
|
||||
labels:
|
||||
severity: none
|
||||
- name: node-network
|
||||
rules:
|
||||
- alert: NodeNetworkInterfaceFlapping
|
||||
@ -61,7 +81,7 @@ spec:
|
||||
record: instance:node_cpu:ratio
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
|
||||
record: cluster:node_cpu:sum_rate5m
|
||||
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))
|
||||
- expr: cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))
|
||||
record: cluster:node_cpu:ratio
|
||||
- name: kube-prometheus-general.rules
|
||||
rules:
|
||||
|
@ -25,7 +25,7 @@ spec:
|
||||
description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than 15 minutes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready
|
||||
summary: Pod has been in a non-ready state for more than 15 minutes.
|
||||
expr: "sum by (namespace, pod) (\n max by(namespace, pod) (\n kube_pod_status_phase{job=\"kube-state-metrics\", phase=~\"Pending|Unknown\"}\n ) * on(namespace, pod) group_left(owner_kind) topk by(namespace, pod) (\n 1, max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!=\"Job\"})\n )\n) > 0\n"
|
||||
expr: "sum by (namespace, pod, cluster) (\n max by(namespace, pod, cluster) (\n kube_pod_status_phase{job=\"kube-state-metrics\", phase=~\"Pending|Unknown\"}\n ) * on(namespace, pod, cluster) group_left(owner_kind) topk by(namespace, pod, cluster) (\n 1, max by(namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!=\"Job\"})\n )\n) > 0\n"
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@ -79,16 +79,16 @@ spec:
|
||||
description: DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15 minutes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck
|
||||
summary: DaemonSet rollout is stuck.
|
||||
expr: "(\n (\n kube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_misscheduled{job=\"kube-state-metrics\"}\n !=\n 0\n ) or (\n kube_daemonset_updated_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_available{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n )\n) and (\n changes(kube_daemonset_updated_number_scheduled{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n"
|
||||
expr: "(\n (\n kube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_misscheduled{job=\"kube-state-metrics\"}\n !=\n 0\n ) or (\n kube_daemonset_status_updated_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_available{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n )\n) and (\n changes(kube_daemonset_status_updated_number_scheduled{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n"
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeContainerWaiting
|
||||
annotations:
|
||||
description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour.
|
||||
description: pod/{{`{{`}} $labels.pod {{`}}`}} in namespace {{`{{`}} $labels.namespace {{`}}`}} on container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting
|
||||
summary: Pod container waiting longer than 1 hour
|
||||
expr: 'sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
|
||||
expr: 'sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
|
||||
|
||||
'
|
||||
for: 1h
|
||||
@ -114,15 +114,12 @@ spec:
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeJobCompletion
|
||||
- alert: KubeJobNotCompleted
|
||||
annotations:
|
||||
description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than 12 hours to complete.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobcompletion
|
||||
description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than {{`{{`}} "43200" | humanizeDuration {{`}}`}} to complete.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted
|
||||
summary: Job did not complete in time
|
||||
expr: 'kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0
|
||||
|
||||
'
|
||||
for: 12h
|
||||
expr: "time() - max by(namespace, job_name, cluster) (kube_job_status_start_time{job=\"kube-state-metrics\"}\n and\nkube_job_status_active{job=\"kube-state-metrics\"} > 0) > 43200\n"
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeJobFailed
|
||||
@ -173,7 +170,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeMemoryOvercommit
|
||||
annotations:
|
||||
description: Cluster has overcommitted memory resource requests for Pods by {{`{{`}} $value {{`}}`}} bytes and cannot tolerate node failure.
|
||||
description: Cluster has overcommitted memory resource requests for Pods by {{`{{`}} $value | humanize {{`}}`}} bytes and cannot tolerate node failure.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit
|
||||
summary: Cluster has overcommitted memory resource requests.
|
||||
expr: 'sum(namespace_memory:kube_pod_container_resource_requests:sum{}) - (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0
|
||||
@ -191,7 +188,7 @@ spec:
|
||||
description: Cluster has overcommitted CPU resource requests for Namespaces.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit
|
||||
summary: Cluster has overcommitted CPU resource requests.
|
||||
expr: "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"cpu\"})\n /\nsum(kube_node_status_allocatable{resource=\"cpu\"})\n > 1.5\n"
|
||||
expr: "sum(min without(resource) (kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=~\"(cpu|requests.cpu)\"}))\n /\nsum(kube_node_status_allocatable{resource=\"cpu\", job=\"kube-state-metrics\"})\n > 1.5\n"
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
@ -200,7 +197,7 @@ spec:
|
||||
description: Cluster has overcommitted memory resource requests for Namespaces.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit
|
||||
summary: Cluster has overcommitted memory resource requests.
|
||||
expr: "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"memory\"})\n /\nsum(kube_node_status_allocatable{resource=\"memory\",job=\"kube-state-metrics\"})\n > 1.5\n"
|
||||
expr: "sum(min without(resource) (kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=~\"(memory|requests.memory)\"}))\n /\nsum(kube_node_status_allocatable{resource=\"memory\", job=\"kube-state-metrics\"})\n > 1.5\n"
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
@ -247,7 +244,7 @@ spec:
|
||||
description: The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is only {{`{{`}} $value | humanizePercentage {{`}}`}} free.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
|
||||
summary: PersistentVolume is filling up.
|
||||
expr: "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\n"
|
||||
expr: "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n"
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
@ -256,7 +253,25 @@ spec:
|
||||
description: Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is expected to fill up within four days. Currently {{`{{`}} $value | humanizePercentage {{`}}`}} is available.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
|
||||
summary: PersistentVolume is filling up.
|
||||
expr: "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\n"
|
||||
expr: "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n"
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubePersistentVolumeInodesFillingUp
|
||||
annotations:
|
||||
description: The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} only has {{`{{`}} $value | humanizePercentage {{`}}`}} free inodes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup
|
||||
summary: PersistentVolumeInodes are filling up.
|
||||
expr: "(\n kubelet_volume_stats_inodes_free{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_inodes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_inodes_used{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n"
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubePersistentVolumeInodesFillingUp
|
||||
annotations:
|
||||
description: Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is expected to run out of inodes within four days. Currently {{`{{`}} $value | humanizePercentage {{`}}`}} of its inodes are free.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup
|
||||
summary: PersistentVolumeInodes are filling up.
|
||||
expr: "(\n kubelet_volume_stats_inodes_free{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_inodes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_inodes_used{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_inodes_free{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n"
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
@ -278,7 +293,7 @@ spec:
|
||||
description: There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch
|
||||
summary: Different semantic versions of Kubernetes components running.
|
||||
expr: 'count(count by (git_version) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"git_version","$1","git_version","(v[0-9]*.[0-9]*).*"))) > 1
|
||||
expr: 'count by (cluster) (count by (git_version, cluster) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"git_version","$1","git_version","(v[0-9]*.[0-9]*).*"))) > 1
|
||||
|
||||
'
|
||||
for: 15m
|
||||
@ -289,7 +304,7 @@ spec:
|
||||
description: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} $value | humanizePercentage {{`}}`}} errors.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors
|
||||
summary: Kubernetes API server client is experiencing errors.
|
||||
expr: "(sum(rate(rest_client_requests_total{code=~\"5..\"}[5m])) by (instance, job, namespace)\n /\nsum(rate(rest_client_requests_total[5m])) by (instance, job, namespace))\n> 0.01\n"
|
||||
expr: "(sum(rate(rest_client_requests_total{code=~\"5..\"}[5m])) by (cluster, instance, job, namespace)\n /\nsum(rate(rest_client_requests_total[5m])) by (cluster, instance, job, namespace))\n> 0.01\n"
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@ -367,7 +382,7 @@ spec:
|
||||
rules:
|
||||
- alert: KubeClientCertificateExpiration
|
||||
annotations:
|
||||
description: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
|
||||
description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration
|
||||
summary: Client certificate is about to expire.
|
||||
expr: 'apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
|
||||
@ -377,7 +392,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeClientCertificateExpiration
|
||||
annotations:
|
||||
description: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
|
||||
description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration
|
||||
summary: Client certificate is about to expire.
|
||||
expr: 'apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
|
||||
@ -385,22 +400,22 @@ spec:
|
||||
'
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: AggregatedAPIErrors
|
||||
- alert: KubeAggregatedAPIErrors
|
||||
annotations:
|
||||
description: An aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has reported errors. It has appeared unavailable {{`{{`}} $value | humanize {{`}}`}} times averaged over the past 10m.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/aggregatedapierrors
|
||||
summary: An aggregated API has reported errors.
|
||||
expr: 'sum by(name, namespace)(increase(aggregator_unavailable_apiservice_total[10m])) > 4
|
||||
description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has reported errors. It has appeared unavailable {{`{{`}} $value | humanize {{`}}`}} times averaged over the past 10m.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors
|
||||
summary: Kubernetes aggregated API has reported errors.
|
||||
expr: 'sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total[10m])) > 4
|
||||
|
||||
'
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: AggregatedAPIDown
|
||||
- alert: KubeAggregatedAPIDown
|
||||
annotations:
|
||||
description: An aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/aggregatedapidown
|
||||
summary: An aggregated API is down.
|
||||
expr: '(1 - max by(name, namespace)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85
|
||||
description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown
|
||||
summary: Kubernetes aggregated API is down.
|
||||
expr: '(1 - max by(name, namespace, cluster)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85
|
||||
|
||||
'
|
||||
for: 5m
|
||||
@ -419,9 +434,9 @@ spec:
|
||||
severity: critical
|
||||
- alert: KubeAPITerminatedRequests
|
||||
annotations:
|
||||
description: The apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
|
||||
description: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests
|
||||
summary: The apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
|
||||
summary: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
|
||||
expr: 'sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / ( sum(rate(apiserver_request_total{job="apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20
|
||||
|
||||
'
|
||||
@ -457,16 +472,16 @@ spec:
|
||||
description: Kubelet '{{`{{`}} $labels.node {{`}}`}}' is running at {{`{{`}} $value | humanizePercentage {{`}}`}} of its Pod capacity.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods
|
||||
summary: Kubelet is running at capacity.
|
||||
expr: "count by(node) (\n (kube_pod_status_phase{job=\"kube-state-metrics\",phase=\"Running\"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job=\"kube-state-metrics\"})\n)\n/\nmax by(node) (\n kube_node_status_capacity{job=\"kube-state-metrics\",resource=\"pods\"} != 1\n) > 0.95\n"
|
||||
expr: "count by(cluster, node) (\n (kube_pod_status_phase{job=\"kube-state-metrics\",phase=\"Running\"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job=\"kube-state-metrics\"})\n)\n/\nmax by(cluster, node) (\n kube_node_status_capacity{job=\"kube-state-metrics\",resource=\"pods\"} != 1\n) > 0.95\n"
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
severity: info
|
||||
- alert: KubeNodeReadinessFlapping
|
||||
annotations:
|
||||
description: The readiness status of node {{`{{`}} $labels.node {{`}}`}} has changed {{`{{`}} $value {{`}}`}} times in the last 15 minutes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping
|
||||
summary: Node readiness status is flapping.
|
||||
expr: 'sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (node) > 2
|
||||
expr: 'sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (cluster, node) > 2
|
||||
|
||||
'
|
||||
for: 15m
|
||||
@ -488,7 +503,7 @@ spec:
|
||||
description: Kubelet Pod startup 99th percentile latency is {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh
|
||||
summary: Kubelet Pod startup latency is too high.
|
||||
expr: 'histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
|
||||
expr: 'histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le)) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
|
||||
|
||||
'
|
||||
for: 15m
|
||||
@ -595,96 +610,85 @@ spec:
|
||||
severity: critical
|
||||
- name: kube-apiserver-burnrate.rules
|
||||
rules:
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
|
||||
5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate1d
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
|
||||
5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate1h
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
|
||||
5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate2h
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"\
|
||||
,code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate30m
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
|
||||
5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate3d
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
|
||||
5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate5m
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
|
||||
5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate6h
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate1d
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate1h
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate2h
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate30m
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate3d
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate5m
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_slo_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate6h
|
||||
- name: kube-apiserver-histogram.rules
|
||||
rules:
|
||||
- expr: 'histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0
|
||||
- expr: 'histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0
|
||||
|
||||
'
|
||||
labels:
|
||||
quantile: '0.99'
|
||||
verb: read
|
||||
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
||||
- expr: 'histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0
|
||||
record: cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile
|
||||
- expr: 'histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_slo_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0
|
||||
|
||||
'
|
||||
labels:
|
||||
quantile: '0.99'
|
||||
verb: write
|
||||
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
||||
- expr: 'histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
|
||||
|
||||
'
|
||||
labels:
|
||||
quantile: '0.99'
|
||||
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
||||
- expr: 'histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
|
||||
|
||||
'
|
||||
labels:
|
||||
quantile: '0.9'
|
||||
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
||||
- expr: 'histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
|
||||
|
||||
'
|
||||
labels:
|
||||
quantile: '0.5'
|
||||
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
||||
record: cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile
|
||||
- interval: 3m
|
||||
name: kube-apiserver-availability.rules
|
||||
rules:
|
||||
@ -704,16 +708,32 @@ spec:
|
||||
labels:
|
||||
verb: write
|
||||
record: code:apiserver_request_total:increase30d
|
||||
- expr: "1 - (\n (\n # write too slow\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~\"POST|PUT|PATCH|DELETE\"}[30d]))\n -\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30d]))\n ) +\n (\n # read too slow\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~\"LIST|GET\"}[30d]))\n -\n (\n (\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30d]))\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30d]))\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"}\
|
||||
\ or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n"
|
||||
- expr: 'sum by (cluster, verb, scope) (increase(apiserver_request_slo_duration_seconds_count[1h]))
|
||||
|
||||
'
|
||||
record: cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase1h
|
||||
- expr: 'sum by (cluster, verb, scope) (avg_over_time(cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase1h[30d]) * 24 * 30)
|
||||
|
||||
'
|
||||
record: cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d
|
||||
- expr: 'sum by (cluster, verb, scope, le) (increase(apiserver_request_slo_duration_seconds_bucket[1h]))
|
||||
|
||||
'
|
||||
record: cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase1h
|
||||
- expr: 'sum by (cluster, verb, scope, le) (avg_over_time(cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase1h[30d]) * 24 * 30)
|
||||
|
||||
'
|
||||
record: cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d
|
||||
- expr: "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"\
|
||||
cluster\",le=\"30\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n"
|
||||
labels:
|
||||
verb: all
|
||||
record: apiserver_request:availability30d
|
||||
- expr: "1 - (\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30d]))\n -\n (\n # too slow\n (\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30d]))\n +\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30d]))\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n"
|
||||
- expr: "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:availability30d
|
||||
- expr: "1 - (\n (\n # too slow\n sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~\"POST|PUT|PATCH|DELETE\"}[30d]))\n -\n sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30d]))\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n"
|
||||
- expr: "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_slo_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_slo_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:availability30d
|
||||
@ -757,19 +777,19 @@ spec:
|
||||
record: node_namespace_pod_container:container_memory_cache
|
||||
- expr: "container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,\n max by(namespace, pod, node) (kube_pod_info{node!=\"\"})\n)\n"
|
||||
record: node_namespace_pod_container:container_memory_swap
|
||||
- expr: "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
|
||||
- expr: "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
|
||||
record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests
|
||||
- expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n"
|
||||
record: namespace_memory:kube_pod_container_resource_requests:sum
|
||||
- expr: "kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
|
||||
- expr: "kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
|
||||
record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests
|
||||
- expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n"
|
||||
record: namespace_cpu:kube_pod_container_resource_requests:sum
|
||||
- expr: "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
|
||||
- expr: "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
|
||||
record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits
|
||||
- expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n"
|
||||
record: namespace_memory:kube_pod_container_resource_limits:sum
|
||||
- expr: "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n )\n"
|
||||
- expr: "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n )\n"
|
||||
record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits
|
||||
- expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n"
|
||||
record: namespace_cpu:kube_pod_container_resource_limits:sum
|
||||
@ -785,6 +805,10 @@ spec:
|
||||
labels:
|
||||
workload_type: statefulset
|
||||
record: namespace_workload_pod:kube_pod_owner:relabel
|
||||
- expr: "max by (cluster, namespace, workload, pod) (\n label_replace(\n kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"Job\"},\n \"workload\", \"$1\", \"owner_name\", \"(.*)\"\n )\n)\n"
|
||||
labels:
|
||||
workload_type: job
|
||||
record: namespace_workload_pod:kube_pod_owner:relabel
|
||||
- name: kube-scheduler.rules
|
||||
rules:
|
||||
- expr: 'histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
|
||||
@ -843,27 +867,33 @@ spec:
|
||||
record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile
|
||||
- name: node.rules
|
||||
rules:
|
||||
- expr: "topk by(namespace, pod) (1,\n max by (node, namespace, pod) (\n label_replace(kube_pod_info{job=\"kube-state-metrics\",node!=\"\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")\n))\n"
|
||||
- expr: "topk by(cluster, namespace, pod) (1,\n max by (cluster, node, namespace, pod) (\n label_replace(kube_pod_info{job=\"kube-state-metrics\",node!=\"\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")\n))\n"
|
||||
record: 'node_namespace_pod:kube_pod_info:'
|
||||
- expr: "count by (cluster, node) (sum by (node, cpu) (\n node_cpu_seconds_total{job=\"node-exporter\"}\n* on (namespace, pod) group_left(node)\n topk by(namespace, pod) (1, node_namespace_pod:kube_pod_info:)\n))\n"
|
||||
record: node:node_num_cpu:sum
|
||||
- expr: "sum(\n node_memory_MemAvailable_bytes{job=\"node-exporter\"} or\n (\n node_memory_Buffers_bytes{job=\"node-exporter\"} +\n node_memory_Cached_bytes{job=\"node-exporter\"} +\n node_memory_MemFree_bytes{job=\"node-exporter\"} +\n node_memory_Slab_bytes{job=\"node-exporter\"}\n )\n) by (cluster)\n"
|
||||
record: :node_memory_MemAvailable_bytes:sum
|
||||
- expr: 'sum(rate(node_cpu_seconds_total{job="node-exporter",mode!="idle",mode!="iowait",mode!="steal"}[5m])) /
|
||||
|
||||
count(sum(node_cpu_seconds_total{job="node-exporter"}) by (cluster, instance, cpu))
|
||||
|
||||
'
|
||||
record: cluster:node_cpu:ratio_rate5m
|
||||
- name: kubelet.rules
|
||||
rules:
|
||||
- expr: 'histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
|
||||
- expr: 'histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (cluster, instance, le) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
|
||||
|
||||
'
|
||||
labels:
|
||||
quantile: '0.99'
|
||||
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
|
||||
- expr: 'histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
|
||||
- expr: 'histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (cluster, instance, le) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
|
||||
|
||||
'
|
||||
labels:
|
||||
quantile: '0.9'
|
||||
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
|
||||
- expr: 'histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
|
||||
- expr: 'histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (cluster, instance, le) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
|
||||
|
||||
'
|
||||
labels:
|
||||
|
@ -14,7 +14,7 @@ spec:
|
||||
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
|
||||
summary: Filesystem is predicted to run out of space within the next 24 hours.
|
||||
expr: "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 40\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 24*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n"
|
||||
expr: "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 15\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 24*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n"
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
@ -23,7 +23,7 @@ spec:
|
||||
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up fast.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
|
||||
summary: Filesystem is predicted to run out of space within the next 4 hours.
|
||||
expr: "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 15\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 4*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n"
|
||||
expr: "(\n node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\"} * 100 < 10\nand\n predict_linear(node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\"}[6h], 4*60*60) < 0\nand\n node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\"} == 0\n)\n"
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
@ -188,13 +188,13 @@ spec:
|
||||
severity: critical
|
||||
- name: node-exporter.rules
|
||||
rules:
|
||||
- expr: "count without (cpu) (\n count without (mode) (\n node_cpu_seconds_total{job=\"node-exporter\"}\n )\n)\n"
|
||||
- expr: "count without (cpu, mode) (\n node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}\n)\n"
|
||||
record: instance:node_num_cpu:sum
|
||||
- expr: "1 - avg without (cpu, mode) (\n rate(node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\"}[5m])\n)\n"
|
||||
- expr: "1 - avg without (cpu) (\n sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\"}[5m]))\n)\n"
|
||||
record: instance:node_cpu_utilisation:rate5m
|
||||
- expr: "(\n node_load1{job=\"node-exporter\"}\n/\n instance:node_num_cpu:sum{job=\"node-exporter\"}\n)\n"
|
||||
record: instance:node_load1_per_cpu:ratio
|
||||
- expr: "1 - (\n node_memory_MemAvailable_bytes{job=\"node-exporter\"}\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\"}\n)\n"
|
||||
- expr: "1 - (\n (\n node_memory_MemAvailable_bytes{job=\"node-exporter\"}\n or\n (\n node_memory_Buffers_bytes{job=\"node-exporter\"}\n +\n node_memory_Cached_bytes{job=\"node-exporter\"}\n +\n node_memory_MemFree_bytes{job=\"node-exporter\"}\n +\n node_memory_Slab_bytes{job=\"node-exporter\"}\n )\n )\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\"}\n)\n"
|
||||
record: instance:node_memory_utilisation:ratio
|
||||
- expr: 'rate(node_vmstat_pgmajfault{job="node-exporter"}[5m])
|
||||
|
||||
|
@ -25,7 +25,7 @@ spec:
|
||||
description: Errors while performing watch operations in controller {{`{{`}}$labels.controller{{`}}`}} in {{`{{`}}$labels.namespace{{`}}`}} namespace.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorwatcherrors
|
||||
summary: Errors while performing watch operations in controller.
|
||||
expr: '(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4
|
||||
expr: '(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[5m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring"}[5m]))) > 0.4
|
||||
|
||||
'
|
||||
for: 15m
|
||||
@ -69,7 +69,7 @@ spec:
|
||||
description: Prometheus operator in {{`{{`}} $labels.namespace {{`}}`}} namespace isn't ready to reconcile {{`{{`}} $labels.controller {{`}}`}} resources.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatornotready
|
||||
summary: Prometheus operator not ready
|
||||
expr: 'min by(namespace, controller) (max_over_time(prometheus_operator_ready{job="prometheus-operator",namespace="monitoring"}[5m]) == 0)
|
||||
expr: 'min by (controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-operator",namespace="monitoring"}[5m]) == 0)
|
||||
|
||||
'
|
||||
for: 5m
|
||||
@ -86,4 +86,19 @@ spec:
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- name: config-reloaders
|
||||
rules:
|
||||
- alert: ConfigReloaderSidecarErrors
|
||||
annotations:
|
||||
description: 'Errors encountered while the {{`{{`}}$labels.pod{{`}}`}} config-reloader sidecar attempts to sync config in {{`{{`}}$labels.namespace{{`}}`}} namespace.
|
||||
|
||||
As a result, configuration for service running in {{`{{`}}$labels.pod{{`}}`}} may be stale and cannot be updated anymore.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/configreloadersidecarerrors
|
||||
summary: config-reloader sidecar has not had a successful reload for 10m
|
||||
expr: 'max_over_time(reloader_last_reload_successful{namespace=~".+"}[5m]) == 0
|
||||
|
||||
'
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
|
||||
|
@ -181,6 +181,28 @@ spec:
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusScrapeBodySizeLimitHit
|
||||
annotations:
|
||||
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed {{`{{`}} printf "%.0f" $value {{`}}`}} scrapes in the last 5m because some targets exceeded the configured body_size_limit.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapebodysizelimithit
|
||||
summary: Prometheus has dropped some targets that exceeded body size limit.
|
||||
expr: 'increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
|
||||
'
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusScrapeSampleLimitHit
|
||||
annotations:
|
||||
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed {{`{{`}} printf "%.0f" $value {{`}}`}} scrapes in the last 5m because some targets exceeded the configured sample_limit.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapesamplelimithit
|
||||
summary: Prometheus has failed scrapes that have exceeded the configured sample limit.
|
||||
expr: 'increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
|
||||
'
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusTargetSyncFailure
|
||||
annotations:
|
||||
description: '{{`{{`}} printf "%.0f" $value {{`}}`}} targets in Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} have failed to sync because invalid configuration was supplied.'
|
||||
|
@ -2,7 +2,7 @@ apiVersion: v2
|
||||
name: kubezero-storage
|
||||
description: KubeZero umbrella chart for all things storage incl. AWS EBS/EFS, openEBS-lvm, gemini
|
||||
type: application
|
||||
version: 0.7.0
|
||||
version: 0.7.1
|
||||
home: https://kubezero.com
|
||||
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
|
||||
keywords:
|
||||
@ -28,7 +28,7 @@ dependencies:
|
||||
condition: gemini.enabled
|
||||
# repository: https://charts.fairwinds.com/stable
|
||||
- name: aws-ebs-csi-driver
|
||||
version: 2.10.1
|
||||
version: 2.11.0
|
||||
condition: aws-ebs-csi-driver.enabled
|
||||
# repository: https://kubernetes-sigs.github.io/aws-ebs-csi-driver
|
||||
- name: aws-efs-csi-driver
|
||||
|
@ -1,9 +1,15 @@
|
||||
# Helm chart
|
||||
|
||||
# v2.11.0
|
||||
* Bump app/driver to version `v1.11.3`
|
||||
* Add support for leader election tuning for `csi-provisioner` and `csi-attacher` ([#1371](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/1371), [@moogzy](https://github.com/moogzy))
|
||||
* Change `fsGroupPolicy` to `File` ([#1377](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/1377), [@ConnorJC3](https://github.com/ConnorJC3))
|
||||
* Allow all taint for `csi-node` by default ([#1381](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/1381), [@gtxu](https://github.com/gtxu))
|
||||
|
||||
## v2.10.1
|
||||
* Bump app/driver to version `v1.11.2`
|
||||
|
||||
## 2.10.0
|
||||
## v2.10.0
|
||||
* Implement securityContext for containers
|
||||
* Add securityContext for node pod
|
||||
* Utilize more secure defaults for securityContext
|
||||
|
@ -3,7 +3,7 @@ annotations:
|
||||
- kind: added
|
||||
description: Custom controller.updateStrategy to set controller deployment strategy.
|
||||
apiVersion: v2
|
||||
appVersion: 1.11.2
|
||||
appVersion: 1.11.3
|
||||
description: A Helm chart for AWS EBS CSI Driver
|
||||
home: https://github.com/kubernetes-sigs/aws-ebs-csi-driver
|
||||
keywords:
|
||||
@ -19,4 +19,4 @@ maintainers:
|
||||
name: aws-ebs-csi-driver
|
||||
sources:
|
||||
- https://github.com/kubernetes-sigs/aws-ebs-csi-driver
|
||||
version: 2.10.1
|
||||
version: 2.11.0
|
||||
|
@ -170,7 +170,18 @@ spec:
|
||||
{{- if .Values.controller.extraCreateMetadata }}
|
||||
- --extra-create-metadata
|
||||
{{- end}}
|
||||
- --leader-election=true
|
||||
- --leader-election={{ .Values.sidecars.provisioner.leaderElection.enabled | required "leader election state for csi-provisioner is required, must be set to true || false." }}
|
||||
{{- if .Values.sidecars.provisioner.leaderElection.enabled }}
|
||||
{{- if .Values.sidecars.provisioner.leaderElection.leaseDuration }}
|
||||
- --leader-election-lease-duration={{ .Values.sidecars.provisioner.leaderElection.leaseDuration }}
|
||||
{{- end }}
|
||||
{{- if .Values.sidecars.provisioner.leaderElection.renewDeadline}}
|
||||
- --leader-election-renew-deadline={{ .Values.sidecars.provisioner.leaderElection.renewDeadline }}
|
||||
{{- end }}
|
||||
{{- if .Values.sidecars.provisioner.leaderElection.retryPeriod }}
|
||||
- --leader-election-retry-period={{ .Values.sidecars.provisioner.leaderElection.retryPeriod }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
- --default-fstype={{ .Values.controller.defaultFsType }}
|
||||
env:
|
||||
- name: ADDRESS
|
||||
@ -202,7 +213,18 @@ spec:
|
||||
args:
|
||||
- --csi-address=$(ADDRESS)
|
||||
- --v={{ .Values.sidecars.attacher.logLevel }}
|
||||
- --leader-election=true
|
||||
- --leader-election={{ .Values.sidecars.attacher.leaderElection.enabled | required "leader election state for csi-attacher is required, must be set to true || false." }}
|
||||
{{- if .Values.sidecars.attacher.leaderElection.enabled }}
|
||||
{{- if .Values.sidecars.attacher.leaderElection.leaseDuration }}
|
||||
- --leader-election-lease-duration={{ .Values.sidecars.attacher.leaderElection.leaseDuration }}
|
||||
{{- end }}
|
||||
{{- if .Values.sidecars.attacher.leaderElection.renewDeadline}}
|
||||
- --leader-election-renew-deadline={{ .Values.sidecars.attacher.leaderElection.renewDeadline }}
|
||||
{{- end }}
|
||||
{{- if .Values.sidecars.attacher.leaderElection.retryPeriod }}
|
||||
- --leader-election-retry-period={{ .Values.sidecars.attacher.leaderElection.retryPeriod }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
env:
|
||||
- name: ADDRESS
|
||||
value: /var/lib/csi/sockets/pluginproxy/csi.sock
|
||||
|
@ -7,3 +7,4 @@ metadata:
|
||||
spec:
|
||||
attachRequired: true
|
||||
podInfoOnMount: false
|
||||
fsGroupPolicy: File
|
||||
|
@ -40,8 +40,6 @@ spec:
|
||||
{{- if .Values.node.tolerateAllTaints }}
|
||||
- operator: Exists
|
||||
{{- else }}
|
||||
- key: CriticalAddonsOnly
|
||||
operator: Exists
|
||||
- operator: Exists
|
||||
effect: NoExecute
|
||||
tolerationSeconds: 300
|
||||
|
@ -40,8 +40,6 @@ spec:
|
||||
{{- if .Values.node.tolerateAllTaints }}
|
||||
- operator: Exists
|
||||
{{- else }}
|
||||
- key: CriticalAddonsOnly
|
||||
operator: Exists
|
||||
- operator: Exists
|
||||
effect: NoExecute
|
||||
tolerationSeconds: 300
|
||||
|
@ -22,6 +22,16 @@ sidecars:
|
||||
tag: "v3.1.0"
|
||||
logLevel: 2
|
||||
resources: {}
|
||||
# Tune leader lease election for csi-provisioner.
|
||||
# Leader election is on by default.
|
||||
leaderElection:
|
||||
enabled: true
|
||||
# Optional values to tune lease behavior.
|
||||
# The arguments provided must be in an acceptable time.ParseDuration format.
|
||||
# Ref: https://pkg.go.dev/flag#Duration
|
||||
# leaseDuration: "15s"
|
||||
# renewDeadline: "10s"
|
||||
# retryPeriod: "5s"
|
||||
securityContext:
|
||||
readOnlyRootFilesystem: true
|
||||
allowPrivilegeEscalation: false
|
||||
@ -31,6 +41,16 @@ sidecars:
|
||||
pullPolicy: IfNotPresent
|
||||
repository: k8s.gcr.io/sig-storage/csi-attacher
|
||||
tag: "v3.4.0"
|
||||
# Tune leader lease election for csi-attacher.
|
||||
# Leader election is on by default.
|
||||
leaderElection:
|
||||
enabled: true
|
||||
# Optional values to tune lease behavior.
|
||||
# The arguments provided must be in an acceptable time.ParseDuration format.
|
||||
# Ref: https://pkg.go.dev/flag#Duration
|
||||
# leaseDuration: "15s"
|
||||
# renewDeadline: "10s"
|
||||
# retryPeriod: "5s"
|
||||
logLevel: 2
|
||||
resources: {}
|
||||
securityContext:
|
||||
@ -183,7 +203,7 @@ node:
|
||||
nodeSelector: {}
|
||||
podAnnotations: {}
|
||||
podLabels: {}
|
||||
tolerateAllTaints: false
|
||||
tolerateAllTaints: true
|
||||
tolerations: []
|
||||
resources: {}
|
||||
serviceAccount:
|
||||
|
@ -18,7 +18,7 @@
|
||||
"subdir": "contrib/mixin"
|
||||
}
|
||||
},
|
||||
"version": "74aa38ec10bc22d34ffd204f46df6e460b78d855",
|
||||
"version": "19002cfc689fba2b8f56605e5797bf79f8b61fdd",
|
||||
"sum": "W/Azptf1PoqjyMwJON96UY69MFugDA4IAYiKURscryc="
|
||||
},
|
||||
{
|
||||
@ -38,7 +38,7 @@
|
||||
"subdir": "grafana-builder"
|
||||
}
|
||||
},
|
||||
"version": "c132c4afcf17491718539db4c2d94c0ea4346120",
|
||||
"version": "dbf6fc14105c28b6fd0253005f7ca2da37d3d4e1",
|
||||
"sum": "tDR6yT2GVfw0wTU12iZH+m01HrbIr6g/xN+/8nzNkU0="
|
||||
},
|
||||
{
|
||||
@ -58,7 +58,7 @@
|
||||
"subdir": "lib/promgrafonnet"
|
||||
}
|
||||
},
|
||||
"version": "eb98d4f74e8ac9c30b1f0e815b07bed31da76c8f",
|
||||
"version": "5e44626d70c2bf2d35c37f3fee5a6261a5335cc6",
|
||||
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
|
||||
},
|
||||
{
|
||||
|
@ -63,11 +63,6 @@ gemini:
|
||||
aws-ebs-csi-driver:
|
||||
enabled: false
|
||||
|
||||
# starting with 1.6 the ebs-plugin panics with "could not get number of attached ENIs"
|
||||
# somewhere related to metadata / volumeattach limits and nitro instances ... AWS as usual
|
||||
#image:
|
||||
# tag: v1.5.3
|
||||
|
||||
controller:
|
||||
replicaCount: 1
|
||||
logLevel: 2
|
||||
|
@ -2,7 +2,7 @@ apiVersion: v2
|
||||
name: kubezero
|
||||
description: KubeZero - Root App of Apps chart
|
||||
type: application
|
||||
version: 1.23.10-3
|
||||
version: 1.23.11
|
||||
home: https://kubezero.com
|
||||
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
|
||||
keywords:
|
||||
|
@ -68,4 +68,4 @@ Kubernetes: `>= 1.20.0`
|
||||
| storage.targetRevision | string | `"0.7.0"` | |
|
||||
|
||||
----------------------------------------------
|
||||
Autogenerated from chart metadata using [helm-docs v1.9.1](https://github.com/norwoodj/helm-docs/releases/v1.9.1)
|
||||
Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0)
|
||||
|
@ -22,7 +22,7 @@ cert-manager:
|
||||
|
||||
storage:
|
||||
enabled: false
|
||||
targetRevision: 0.7.0
|
||||
targetRevision: 0.7.1
|
||||
aws-ebs-csi-driver:
|
||||
enabled: false
|
||||
aws-efs-csi-driver:
|
||||
@ -52,7 +52,7 @@ istio-private-ingress:
|
||||
metrics:
|
||||
enabled: false
|
||||
namespace: monitoring
|
||||
targetRevision: 0.8.1
|
||||
targetRevision: 0.8.4
|
||||
istio:
|
||||
grafana: {}
|
||||
prometheus: {}
|
||||
|
@ -29,4 +29,4 @@ Kubernetes: `>= 1.20.0`
|
||||
| manticoresearch.worker.volume.size | string | `"4Gi"` | |
|
||||
|
||||
----------------------------------------------
|
||||
Autogenerated from chart metadata using [helm-docs v1.9.1](https://github.com/norwoodj/helm-docs/releases/v1.9.1)
|
||||
Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0)
|
||||
|
Loading…
Reference in New Issue
Block a user