feat: metrics version bump

This commit is contained in:
Stefan Reimer 2021-12-09 19:03:55 +01:00
parent 4ab3d4e3d6
commit cd56f0a7aa
105 changed files with 5517 additions and 1354 deletions

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: kubezero-metrics
description: KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all Kubernetes integrations.
type: application
version: 0.5.4
version: 0.6.1
home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords:
@ -11,22 +11,23 @@ keywords:
- alertmanager
- grafana
maintainers:
- name: Quarky9
- name: Stefan Reimer
email: stefan@zero-downtime.net
dependencies:
- name: kubezero-lib
version: ">= 0.1.4"
repository: https://cdn.zero-downtime.net/charts/
- name: kube-prometheus-stack
version: 18.1.0
version: 23.1.5
# Switch back to upstream once all alerts are fixed eg. etcd gpcr
# repository: https://prometheus-community.github.io/helm-charts
- name: prometheus-adapter
version: 2.17
version: 3.0.0
repository: https://prometheus-community.github.io/helm-charts
condition: prometheus-adapter.enabled
- name: prometheus-pushgateway
version: 1.10.1
version: 1.13.0
# Switch back to upstream once namespaces are supported
# repository: https://prometheus-community.github.io/helm-charts
condition: prometheus-pushgateway.enabled
kubeVersion: ">= 1.18.0"
kubeVersion: ">= 1.20.0"

View File

@ -6,20 +6,20 @@ annotations:
url: https://github.com/prometheus-operator/kube-prometheus
artifacthub.io/operator: "true"
apiVersion: v2
appVersion: 0.50.0
appVersion: 0.52.0
dependencies:
- condition: kubeStateMetrics.enabled
name: kube-state-metrics
repository: https://prometheus-community.github.io/helm-charts
version: 3.4.*
version: 4.1.*
- condition: nodeExporter.enabled
name: prometheus-node-exporter
repository: https://prometheus-community.github.io/helm-charts
version: 2.0.*
version: 2.2.*
- condition: grafana.enabled
name: grafana
repository: https://grafana.github.io/helm-charts
version: 6.16.*
version: 6.18.*
description: kube-prometheus-stack collects Kubernetes manifests, Grafana dashboards,
and Prometheus rules combined with documentation and scripts to provide easy to
operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus
@ -32,7 +32,6 @@ keywords:
- kube-prometheus
kubeVersion: '>=1.16.0-0'
maintainers:
- name: vsliouniaev
- name: bismarck
- email: gianrubio@gmail.com
name: gianrubio
@ -47,4 +46,4 @@ sources:
- https://github.com/prometheus-community/helm-charts
- https://github.com/prometheus-operator/kube-prometheus
type: application
version: 18.1.0
version: 23.1.5

View File

@ -83,7 +83,58 @@ _See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documen
A major chart version change (like v1.2.3 -> v2.0.0) indicates that there is an incompatible breaking change needing manual actions.
### From 22.x to 23.x
Port names have been renamed for Istio's
[explicit protocol selection](https://istio.io/latest/docs/ops/configuration/traffic-management/protocol-selection/#explicit-protocol-selection).
| | old value | new value |
|-|-----------|-----------|
| `alertmanager.alertmanagerSpec.portName` | `web` | `http-web` |
| `grafana.service.portName` | `service` | `http-web` |
| `prometheus-node-exporter.service.portName` | `metrics` (hardcoded) | `http-metrics` |
| `prometheus.prometheusSpec.portName` | `web` | `http-web` |
### From 21.x to 22.x
Due to the upgrade of the `kube-state-metrics` chart, removal of its deployment/stateful needs to done manually prior to upgrading:
```console
kubectl delete deployments.apps -l app.kubernetes.io/instance=prometheus-operator,app.kubernetes.io/name=kube-state-metrics --cascade=orphan
```
or if you use autosharding:
```console
kubectl delete statefulsets.apps -l app.kubernetes.io/instance=prometheus-operator,app.kubernetes.io/name=kube-state-metrics --cascade=orphan
```
### From 20.x to 21.x
The config reloader values have been refactored. All the values have been moved to the key `prometheusConfigReloader` and the limits and requests can now be set separately.
### From 19.x to 20.x
Version 20 upgrades prometheus-operator from 0.50.x to 0.52.x. Helm does not automatically upgrade or install new CRDs on a chart upgrade, so you have to install the CRDs manually before updating:
```console
kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml
kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
```
### From 18.x to 19.x
`kubeStateMetrics.serviceMonitor.namespaceOverride` was removed.
Please use `kube-state-metrics.namespaceOverride` instead.
### From 17.x to 18.x
Version 18 upgrades prometheus-operator from 0.49.x to 0.50.x. Helm does not automatically upgrade or install new CRDs on a chart upgrade, so you have to install the CRDs manually before updating:
```console
@ -98,6 +149,7 @@ kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheu
```
### From 16.x to 17.x
Version 17 upgrades prometheus-operator from 0.48.x to 0.49.x. Helm does not automatically upgrade or install new CRDs on a chart upgrade, so you have to install the CRDs manually before updating:
```console
@ -111,11 +163,12 @@ kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheu
kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
```
### From 15.x to 16.x
Version 16 upgrades kube-state-metrics to v2.0.0. This includes changed command-line arguments and removed metrics, see this [blog post](https://kubernetes.io/blog/2021/04/13/kube-state-metrics-v-2-0/). This version also removes Grafana dashboards that supported Kubernetes 1.14 or earlier.
### From 14.x to 15.x
Version 15 upgrades prometheus-operator from 0.46.x to 0.47.x. Helm does not automatically upgrade or install new CRDs on a chart upgrade, so you have to install the CRDs manually before updating:
```console

View File

@ -1,5 +1,5 @@
apiVersion: v2
appVersion: 8.1.5
appVersion: 8.3.1
description: The leading tool for querying and visualizing time series and metrics.
home: https://grafana.net
icon: https://raw.githubusercontent.com/grafana/grafana/master/public/img/logo_transparent_400x.png
@ -19,4 +19,4 @@ name: grafana
sources:
- https://github.com/grafana/grafana
type: application
version: 6.16.9
version: 6.18.2

View File

@ -59,8 +59,8 @@ This version requires Helm >= 3.1.0.
| `securityContext` | Deployment securityContext | `{"runAsUser": 472, "runAsGroup": 472, "fsGroup": 472}` |
| `priorityClassName` | Name of Priority Class to assign pods | `nil` |
| `image.repository` | Image repository | `grafana/grafana` |
| `image.tag` | Image tag (`Must be >= 5.0.0`) | `8.0.3` |
| `image.sha` | Image sha (optional) | `80c6d6ac633ba5ab3f722976fb1d9a138f87ca6a9934fcd26a5fc28cbde7dbfa` |
| `image.tag` | Image tag (`Must be >= 5.0.0`) | `8.2.5` |
| `image.sha` | Image sha (optional) | `2acf04c016c77ca2e89af3536367ce847ee326effb933121881c7c89781051d3` |
| `image.pullPolicy` | Image pull policy | `IfNotPresent` |
| `image.pullSecrets` | Image pull secrets | `{}` |
| `service.enabled` | Enable grafana service | `true` |
@ -75,6 +75,7 @@ This version requires Helm >= 3.1.0.
| `service.loadBalancerIP` | IP address to assign to load balancer (if supported) | `nil` |
| `service.loadBalancerSourceRanges` | list of IP CIDRs allowed access to lb (if supported) | `[]` |
| `service.externalIPs` | service external IP addresses | `[]` |
| `headlessService` | Create a headless service | `false` |
| `extraExposePorts` | Additional service ports for sidecar containers| `[]` |
| `hostAliases` | adds rules to the pod's /etc/hosts | `[]` |
| `ingress.enabled` | Enables Ingress | `false` |
@ -141,7 +142,8 @@ This version requires Helm >= 3.1.0.
| `sidecar.image.sha` | Sidecar image sha (optional) | `""` |
| `sidecar.imagePullPolicy` | Sidecar image pull policy | `IfNotPresent` |
| `sidecar.resources` | Sidecar resources | `{}` |
| `sidecar.enableUniqueFilenames` | Sets the kiwigrid/k8s-sidecar UNIQUE_FILENAMES environment variable | `false` |
| `sidecar.securityContext` | Sidecar securityContext | `{}` |
| `sidecar.enableUniqueFilenames` | Sets the kiwigrid/k8s-sidecar UNIQUE_FILENAMES environment variable. If set to `true` the sidecar will create unique filenames where duplicate data keys exist between ConfigMaps and/or Secrets within the same or multiple Namespaces. | `false` |
| `sidecar.dashboards.enabled` | Enables the cluster wide search for dashboards and adds/updates/deletes them in grafana | `false` |
| `sidecar.dashboards.SCProvider` | Enables creation of sidecar provider | `true` |
| `sidecar.dashboards.provider.name` | Unique name of the grafana provider | `sidecarProvider` |
@ -158,16 +160,18 @@ This version requires Helm >= 3.1.0.
| `sidecar.dashboards.folder` | Folder in the pod that should hold the collected dashboards (unless `sidecar.dashboards.defaultFolderName` is set). This path will be mounted. | `/tmp/dashboards` |
| `sidecar.dashboards.folderAnnotation` | The annotation the sidecar will look for in configmaps to override the destination folder for files | `nil` |
| `sidecar.dashboards.defaultFolderName` | The default folder name, it will create a subfolder under the `sidecar.dashboards.folder` and put dashboards in there instead | `nil` |
| `sidecar.dashboards.searchNamespace` | If specified, the sidecar will search for dashboard config-maps inside this namespace. Otherwise the namespace in which the sidecar is running will be used. It's also possible to specify ALL to search in all namespaces | `nil` |
| `sidecar.dashboards.searchNamespace` | Namespaces list. If specified, the sidecar will search for dashboards config-maps inside these namespaces.Otherwise the namespace in which the sidecar is running will be used.It's also possible to specify ALL to search in all namespaces. | `nil` |
| `sidecar.dashboards.script` | Absolute path to shell script to execute after a configmap got reloaded. | `nil` |
| `sidecar.dashboards.resource` | Should the sidecar looks into secrets, configmaps or both. | `both` |
| `sidecar.dashboards.extraMounts` | Additional dashboard sidecar volume mounts. | `[]` |
| `sidecar.datasources.enabled` | Enables the cluster wide search for datasources and adds/updates/deletes them in grafana |`false` |
| `sidecar.datasources.label` | Label that config maps with datasources should have to be added | `grafana_datasource` |
| `sidecar.datasources.labelValue` | Label value that config maps with datasources should have to be added | `nil` |
| `sidecar.datasources.searchNamespace` | If specified, the sidecar will search for datasources config-maps inside this namespace. Otherwise the namespace in which the sidecar is running will be used. It's also possible to specify ALL to search in all namespaces | `nil` |
| `sidecar.datasources.searchNamespace` | Namespaces list. If specified, the sidecar will search for datasources config-maps inside these namespaces.Otherwise the namespace in which the sidecar is running will be used.It's also possible to specify ALL to search in all namespaces. | `nil` |
| `sidecar.datasources.resource` | Should the sidecar looks into secrets, configmaps or both. | `both` |
| `sidecar.notifiers.enabled` | Enables the cluster wide search for notifiers and adds/updates/deletes them in grafana | `false` |
| `sidecar.notifiers.label` | Label that config maps with notifiers should have to be added | `grafana_notifier` |
| `sidecar.notifiers.searchNamespace` | If specified, the sidecar will search for notifiers config-maps (or secrets) inside this namespace. Otherwise the namespace in which the sidecar is running will be used. It's also possible to specify ALL to search in all namespaces | `nil` |
| `sidecar.notifiers.searchNamespace` | Namespaces list. If specified, the sidecar will search for notifiers config-maps (or secrets) inside these namespaces.Otherwise the namespace in which the sidecar is running will be used.It's also possible to specify ALL to search in all namespaces. | `nil` |
| `sidecar.notifiers.resource` | Should the sidecar looks into secrets, configmaps or both. | `both` |
| `smtp.existingSecret` | The name of an existing secret containing the SMTP credentials. | `""` |
| `smtp.userKey` | The key in the existing SMTP secret containing the username. | `"user"` |
@ -190,7 +194,7 @@ This version requires Helm >= 3.1.0.
| `command` | Define command to be executed by grafana container at startup | `nil` |
| `testFramework.enabled` | Whether to create test-related resources | `true` |
| `testFramework.image` | `test-framework` image repository. | `bats/bats` |
| `testFramework.tag` | `test-framework` image tag. | `v1.1.0` |
| `testFramework.tag` | `test-framework` image tag. | `v1.4.1` |
| `testFramework.imagePullPolicy` | `test-framework` image pull policy. | `IfNotPresent` |
| `testFramework.securityContext` | `test-framework` securityContext | `{}` |
| `downloadDashboards.env` | Environment variables to be passed to the `download-dashboards` container | `{}` |
@ -230,6 +234,11 @@ This version requires Helm >= 3.1.0.
| `imageRenderer.networkPolicy.limitIngress` | Enable a NetworkPolicy to limit inbound traffic from only the created grafana pods | `true` |
| `imageRenderer.networkPolicy.limitEgress` | Enable a NetworkPolicy to limit outbound traffic to only the created grafana pods | `false` |
| `imageRenderer.resources` | Set resource limits for image-renderer pdos | `{}` |
| `networkPolicy.enabled` | Enable creation of NetworkPolicy resources. | `false` |
| `networkPolicy.allowExternal` | Don't require client label for connections | `true` |
| `networkPolicy.explicitNamespacesSelector` | A Kubernetes LabelSelector to explicitly select namespaces from which traffic could be allowed | `{}` |
### Example ingress with path
@ -526,3 +535,23 @@ imageRenderer:
### Image Renderer NetworkPolicy
By default the image-renderer pods will have a network policy which only allows ingress traffic from the created grafana instance
### High Availability for unified alerting
If you want to run Grafana in a high availability cluster you need to enable
the headless service by setting `headlessService: true` in your `values.yaml`
file.
As next step you have to setup the `grafana.ini` in your `values.yaml` in a way
that it will make use of the headless service to obtain all the IPs of the
cluster. You should replace ``{{ Name }}`` with the name of your helm deployment.
```yaml
grafana.ini:
...
unified_alerting:
enabled: true
ha_peers: {{ Name }}-headless:9094
alerting:
enabled: false
```

View File

@ -87,7 +87,7 @@ initContainers:
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
env:
- name: METHOD
value: LIST
value: {{ .Values.sidecar.datasources.watchMethod }}
- name: LABEL
value: "{{ .Values.sidecar.datasources.label }}"
{{- if .Values.sidecar.datasources.labelValue }}
@ -104,7 +104,7 @@ initContainers:
{{- end }}
{{- if .Values.sidecar.datasources.searchNamespace }}
- name: NAMESPACE
value: "{{ .Values.sidecar.datasources.searchNamespace }}"
value: "{{ .Values.sidecar.datasources.searchNamespace | join "," }}"
{{- end }}
{{- if .Values.sidecar.skipTlsVerify }}
- name: SKIP_TLS_VERIFY
@ -112,6 +112,10 @@ initContainers:
{{- end }}
resources:
{{ toYaml .Values.sidecar.resources | indent 6 }}
{{- if .Values.sidecar.securityContext }}
securityContext:
{{- toYaml .Values.sidecar.securityContext | nindent 6 }}
{{- end }}
volumeMounts:
- name: sc-datasources-volume
mountPath: "/etc/grafana/provisioning/datasources"
@ -139,7 +143,7 @@ initContainers:
{{- end }}
{{- if .Values.sidecar.notifiers.searchNamespace }}
- name: NAMESPACE
value: "{{ .Values.sidecar.notifiers.searchNamespace }}"
value: "{{ .Values.sidecar.notifiers.searchNamespace | join "," }}"
{{- end }}
{{- if .Values.sidecar.skipTlsVerify }}
- name: SKIP_TLS_VERIFY
@ -147,6 +151,10 @@ initContainers:
{{- end }}
resources:
{{ toYaml .Values.sidecar.resources | indent 6 }}
{{- if .Values.sidecar.securityContext }}
securityContext:
{{- toYaml .Values.sidecar.securityContext | nindent 6 }}
{{- end }}
volumeMounts:
- name: sc-notifiers-volume
mountPath: "/etc/grafana/provisioning/notifiers"
@ -189,7 +197,7 @@ containers:
{{- end }}
{{- if .Values.sidecar.dashboards.searchNamespace }}
- name: NAMESPACE
value: "{{ .Values.sidecar.dashboards.searchNamespace }}"
value: "{{ .Values.sidecar.dashboards.searchNamespace | join "," }}"
{{- end }}
{{- if .Values.sidecar.skipTlsVerify }}
- name: SKIP_TLS_VERIFY
@ -199,16 +207,22 @@ containers:
- name: FOLDER_ANNOTATION
value: "{{ .Values.sidecar.dashboards.folderAnnotation }}"
{{- end }}
{{- if .Values.sidecar.dashboards.script }}
- name: SCRIPT
value: /opt/script.sh
value: "{{ .Values.sidecar.dashboards.script }}"
{{- end }}
resources:
{{ toYaml .Values.sidecar.resources | indent 6 }}
{{- if .Values.sidecar.securityContext }}
securityContext:
{{- toYaml .Values.sidecar.securityContext | nindent 6 }}
{{- end }}
volumeMounts:
- name: sc-dashboard-volume
mountPath: {{ .Values.sidecar.dashboards.folder | quote }}
- name: script-volume
mountPath: /opt/script.sh
subPath: script.sh
{{- if .Values.sidecar.dashboards.extraMounts }}
{{- toYaml .Values.sidecar.dashboards.extraMounts | trim | nindent 6}}
{{- end }}
{{- end}}
- name: {{ .Chart.Name }}
{{- if .Values.image.sha }}

View File

@ -9,6 +9,9 @@ metadata:
labels:
{{- include "grafana.labels" $ | nindent 4 }}
dashboard-provider: {{ $provider }}
{{- if $.Values.sidecar.dashboards.label }}
{{ $.Values.sidecar.dashboards.label }}: "1"
{{- end }}
{{- if $dashboards }}
data:
{{- $dashboardFound := false }}

View File

@ -1,4 +1,4 @@
{{- if and .Values.persistence.enabled (not .Values.persistence.existingClaim) (eq .Values.persistence.type "statefulset")}}
{{- if or .Values.headlessService (and .Values.persistence.enabled (not .Values.persistence.existingClaim) (eq .Values.persistence.type "statefulset"))}}
apiVersion: v1
kind: Service
metadata:
@ -15,4 +15,8 @@ spec:
selector:
{{- include "grafana.selectorLabels" . | nindent 4 }}
type: ClusterIP
ports:
- protocol: TCP
port: 3000
targetPort: 3000
{{- end }}

View File

@ -78,6 +78,10 @@ spec:
- name: {{ .Values.imageRenderer.service.portName }}
containerPort: {{ .Values.imageRenderer.service.port }}
protocol: TCP
livenessProbe:
httpGet:
path: /
port: {{ .Values.imageRenderer.service.portName }}
env:
- name: HTTP_PORT
value: {{ .Values.imageRenderer.service.port | quote }}

View File

@ -0,0 +1,37 @@
{{- if .Values.networkPolicy.enabled }}
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: {{ template "grafana.fullname" . }}
namespace: {{ template "grafana.namespace" . }}
labels:
{{- include "grafana.labels" . | nindent 4 }}
{{- if .Values.labels }}
{{ toYaml .Values.labels | indent 4 }}
{{- end }}
{{- with .Values.annotations }}
annotations:
{{ toYaml . | indent 4 }}
{{- end }}
spec:
podSelector:
matchLabels:
{{- include "grafana.selectorLabels" . | nindent 6 }}
ingress:
- ports:
- port: {{ .Values.service.targetPort }}
{{- if not .Values.networkPolicy.allowExternal }}
from:
- podSelector:
matchLabels:
{{ template "grafana.fullname" . }}-client: "true"
{{- if .Values.networkPolicy.explicitNamespacesSelector }}
namespaceSelector:
{{ toYaml .Values.networkPolicy.explicitNamespacesSelector | indent 12 }}
{{- end }}
- podSelector:
matchLabels:
{{- include "grafana.labels" . | nindent 14 }}
role: read
{{- end }}
{{- end }}

View File

@ -23,6 +23,9 @@ serviceAccount:
replicas: 1
## Create a headless service for the deployment
headlessService: false
## Create HorizontalPodAutoscaler object for deployment type
#
autoscaling:
@ -70,7 +73,7 @@ livenessProbe:
image:
repository: grafana/grafana
tag: 8.1.5
tag: 8.3.1
sha: ""
pullPolicy: IfNotPresent
@ -84,7 +87,7 @@ image:
testFramework:
enabled: true
image: "bats/bats"
tag: "v1.1.0"
tag: "v1.4.1"
imagePullPolicy: IfNotPresent
securityContext: {}
@ -615,7 +618,7 @@ smtp:
sidecar:
image:
repository: quay.io/kiwigrid/k8s-sidecar
tag: 1.12.3
tag: 1.14.2
sha: ""
imagePullPolicy: IfNotPresent
resources: {}
@ -625,6 +628,7 @@ sidecar:
# requests:
# cpu: 50m
# memory: 50Mi
securityContext: {}
# skipTlsVerify Set to true to skip tls verification for kube api calls
# skipTlsVerify: true
enableUniqueFilenames: false
@ -639,15 +643,19 @@ sidecar:
folder: /tmp/dashboards
# The default folder name, it will create a subfolder under the `folder` and put dashboards in there instead
defaultFolderName: null
# If specified, the sidecar will search for dashboard config-maps inside this namespace.
# Namespaces list. If specified, the sidecar will search for config-maps/secrets inside these namespaces.
# Otherwise the namespace in which the sidecar is running will be used.
# It's also possible to specify ALL to search in all namespaces
# It's also possible to specify ALL to search in all namespaces.
searchNamespace: null
# Method to use to detect ConfigMap changes. With WATCH the sidecar will do a WATCH requests, with SLEEP it will list all ConfigMaps, then sleep for 60 seconds.
watchMethod: WATCH
# search in configmap, secret or both
resource: both
# If specified, the sidecar will look for annotation with this name to create folder and put graph here.
# You can use this parameter together with `provider.foldersFromFilesStructure`to annotate configmaps and create folder structure.
folderAnnotation: null
# Absolute path to shell script to execute after a configmap got reloaded
script: null
# provider configuration that lets grafana manage the dashboards
provider:
# name of the provider, should be unique
@ -664,6 +672,8 @@ sidecar:
allowUiUpdates: false
# allow Grafana to replicate dashboard structure from filesystem
foldersFromFilesStructure: false
# Additional dashboard sidecar volume mounts
extraMounts: []
datasources:
enabled: false
# label that the configmaps with datasources are marked with
@ -674,6 +684,8 @@ sidecar:
# Otherwise the namespace in which the sidecar is running will be used.
# It's also possible to specify ALL to search in all namespaces
searchNamespace: null
# Method to use to detect ConfigMap changes. With WATCH the sidecar will do a WATCH requests, with SLEEP it will list all ConfigMaps, then sleep for 60 seconds.
watchMethod: LIST
# search in configmap, secret or both
resource: both
notifiers:
@ -748,3 +760,29 @@ imageRenderer:
# requests:
# cpu: 50m
# memory: 50Mi
networkPolicy:
## @param networkPolicy.enabled Enable creation of NetworkPolicy resources. Only Ingress traffic is filtered for now.
##
enabled: false
## @param networkPolicy.allowExternal Don't require client label for connections
## The Policy model to apply. When set to false, only pods with the correct
## client label will have network access to grafana port defined.
## When true, grafana will accept connections from any source
## (with the correct destination port).
##
allowExternal: true
## @param networkPolicy.explicitNamespacesSelector A Kubernetes LabelSelector to explicitly select namespaces from which traffic could be allowed
## If explicitNamespacesSelector is missing or set to {}, only client Pods that are in the networkPolicy's namespace
## and that match other criteria, the ones that have the good label, can reach the grafana.
## But sometimes, we want the grafana to be accessible to clients from other namespaces, in this case, we can use this
## LabelSelector to select these namespaces, note that the networkPolicy's namespace should also be explicitly added.
##
## Example:
## explicitNamespacesSelector:
## matchLabels:
## role: frontend
## matchExpressions:
## - {key: role, operator: In, values: [frontend]}
##
explicitNamespacesSelector: {}

View File

@ -1,5 +1,5 @@
apiVersion: v2
appVersion: 2.1.1
appVersion: 2.2.4
description: Install kube-state-metrics to generate and expose cluster-level metrics
home: https://github.com/kubernetes/kube-state-metrics/
keywords:
@ -16,4 +16,4 @@ name: kube-state-metrics
sources:
- https://github.com/kubernetes/kube-state-metrics/
type: application
version: 3.4.2
version: 4.1.1

View File

@ -45,3 +45,35 @@ Allow the release namespace to be overridden for multi-namespace deployments in
{{- .Release.Namespace -}}
{{- end -}}
{{- end -}}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "kube-state-metrics.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{/*
Generate basic labels
*/}}
{{- define "kube-state-metrics.labels" }}
helm.sh/chart: {{ include "kube-state-metrics.chart" . }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/component: metrics
app.kubernetes.io/part-of: {{ template "kube-state-metrics.name" . }}
{{- include "kube-state-metrics.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
{{- if .Values.customLabels }}
{{ toYaml .Values.customLabels }}
{{- end }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "kube-state-metrics.selectorLabels" }}
app.kubernetes.io/name: {{ include "kube-state-metrics.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

View File

@ -3,10 +3,7 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }}
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- include "kube-state-metrics.labels" . | indent 4 }}
name: {{ template "kube-state-metrics.fullname" . }}
roleRef:
apiGroup: rbac.authorization.k8s.io

View File

@ -8,18 +8,11 @@ metadata:
name: {{ template "kube-state-metrics.fullname" . }}
namespace: {{ template "kube-state-metrics.namespace" . }}
labels:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }}
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
app.kubernetes.io/instance: "{{ .Release.Name }}"
app.kubernetes.io/managed-by: "{{ .Release.Service }}"
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
{{- if .Values.customLabels }}
{{ toYaml .Values.customLabels | indent 4 }}
{{- end }}
{{- include "kube-state-metrics.labels" . | indent 4 }}
spec:
selector:
matchLabels:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }}
{{- include "kube-state-metrics.selectorLabels" . | indent 6 }}
replicas: {{ .Values.replicas }}
{{- if .Values.autosharding.enabled }}
serviceName: {{ template "kube-state-metrics.fullname" . }}
@ -28,11 +21,7 @@ spec:
template:
metadata:
labels:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }}
app.kubernetes.io/instance: "{{ .Release.Name }}"
{{- if .Values.customLabels }}
{{ toYaml .Values.customLabels | indent 8 }}
{{- end }}
{{- include "kube-state-metrics.labels" . | indent 8 }}
{{- if .Values.podAnnotations }}
annotations:
{{ toYaml .Values.podAnnotations | indent 8 }}
@ -77,6 +66,9 @@ spec:
{{- if .Values.metricLabelsAllowlist }}
- --metric-labels-allowlist={{ .Values.metricLabelsAllowlist | join "," }}
{{- end }}
{{- if .Values.metricAnnotationsAllowList }}
- --metric-annotations-allowlist={{ .Values.metricAnnotationsAllowList | join "," }}
{{- end }}
{{- if .Values.metricAllowlist }}
- --metric-allowlist={{ .Values.metricAllowlist | join "," }}
{{- end }}
@ -107,8 +99,10 @@ spec:
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
ports:
- containerPort: {{ .Values.service.port | default 8080}}
name: "http"
{{- if .Values.selfMonitor.enabled }}
- containerPort: {{ .Values.selfMonitor.telemetryPort | default 8081 }}
name: "metrics"
{{- end }}
livenessProbe:
httpGet:

View File

@ -5,10 +5,7 @@ metadata:
name: {{ template "kube-state-metrics.fullname" . }}-kubeconfig
namespace: {{ template "kube-state-metrics.namespace" . }}
labels:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }}
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
app.kubernetes.io/instance: "{{ .Release.Name }}"
app.kubernetes.io/managed-by: "{{ .Release.Service }}"
{{- include "kube-state-metrics.labels" . | indent 4 }}
type: Opaque
data:
config: '{{ .Values.kubeconfig.secret }}'

View File

@ -5,13 +5,7 @@ metadata:
name: {{ template "kube-state-metrics.fullname" . }}
namespace: {{ template "kube-state-metrics.namespace" . }}
labels:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }}
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
app.kubernetes.io/instance: "{{ .Release.Name }}"
app.kubernetes.io/managed-by: "{{ .Release.Service }}"
{{- if .Values.customLabels }}
{{ toYaml .Values.customLabels | indent 4 }}
{{- end }}
{{- include "kube-state-metrics.labels" . | indent 4 }}
spec:
selector:
matchLabels:

View File

@ -4,10 +4,7 @@ kind: PodSecurityPolicy
metadata:
name: {{ template "kube-state-metrics.fullname" . }}
labels:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }}
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- include "kube-state-metrics.labels" . | indent 4 }}
{{- if .Values.podSecurityPolicy.annotations }}
annotations:
{{ toYaml .Values.podSecurityPolicy.annotations | indent 4 }}

View File

@ -3,10 +3,7 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }}
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- include "kube-state-metrics.labels" . | indent 4 }}
name: psp-{{ template "kube-state-metrics.fullname" . }}
rules:
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}

View File

@ -3,10 +3,7 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }}
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- include "kube-state-metrics.labels" . | indent 4 }}
name: psp-{{ template "kube-state-metrics.fullname" . }}
roleRef:
apiGroup: rbac.authorization.k8s.io

View File

@ -10,9 +10,15 @@ kind: ClusterRole
metadata:
labels:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" $ }}
helm.sh/chart: {{ $.Chart.Name }}-{{ $.Chart.Version }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/component: metrics
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: "{{ $.Chart.Version }}"
app.kubernetes.io/part-of: {{ template "kube-state-metrics.name" $ }}
helm.sh/chart: {{ $.Chart.Name }}-{{ $.Chart.Version }}
{{- if $.Values.customLabels }}
{{ toYaml $.Values.customLabels | nindent 4 }}
{{- end }}
name: {{ template "kube-state-metrics.fullname" $ }}
{{- if eq $.Values.rbac.useClusterRole false }}
namespace: {{ . }}

View File

@ -5,10 +5,7 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" $ }}
helm.sh/chart: {{ $.Chart.Name }}-{{ $.Chart.Version }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/instance: {{ $.Release.Name }}
{{- include "kube-state-metrics.labels" . | indent 4 }}
name: {{ template "kube-state-metrics.fullname" $ }}
namespace: {{ . }}
roleRef:

View File

@ -4,13 +4,7 @@ metadata:
name: {{ template "kube-state-metrics.fullname" . }}
namespace: {{ template "kube-state-metrics.namespace" . }}
labels:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }}
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
app.kubernetes.io/instance: "{{ .Release.Name }}"
app.kubernetes.io/managed-by: "{{ .Release.Service }}"
{{- if .Values.customLabels }}
{{ toYaml .Values.customLabels | indent 4 }}
{{- end }}
{{- include "kube-state-metrics.labels" . | indent 4 }}
annotations:
{{- if .Values.prometheusScrape }}
prometheus.io/scrape: '{{ .Values.prometheusScrape }}'
@ -38,5 +32,4 @@ spec:
loadBalancerIP: "{{ .Values.service.loadBalancerIP }}"
{{- end }}
selector:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- include "kube-state-metrics.selectorLabels" . | indent 4 }}

View File

@ -3,10 +3,7 @@ apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }}
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- include "kube-state-metrics.labels" . | indent 4 }}
name: {{ template "kube-state-metrics.serviceAccountName" . }}
namespace: {{ template "kube-state-metrics.namespace" . }}
{{- if .Values.serviceAccount.annotations }}

View File

@ -5,30 +5,58 @@ metadata:
name: {{ template "kube-state-metrics.fullname" . }}
namespace: {{ template "kube-state-metrics.namespace" . }}
labels:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }}
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
app.kubernetes.io/instance: "{{ .Release.Name }}"
app.kubernetes.io/managed-by: "{{ .Release.Service }}"
{{- if .Values.prometheus.monitor.additionalLabels }}
{{ toYaml .Values.prometheus.monitor.additionalLabels | indent 4 }}
{{- end }}
{{- if .Values.customLabels }}
{{ toYaml .Values.customLabels | indent 4 }}
{{- include "kube-state-metrics.labels" . | indent 4 }}
{{- with .Values.prometheus.monitor.additionalLabels }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
jobLabel: app.kubernetes.io/name
selector:
matchLabels:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- include "kube-state-metrics.selectorLabels" . | indent 6 }}
endpoints:
- port: http
{{- if .Values.prometheus.monitor.interval }}
interval: {{ .Values.prometheus.monitor.interval }}
{{- end }}
{{- if .Values.prometheus.monitor.scrapeTimeout }}
scrapeTimeout: {{ .Values.prometheus.monitor.scrapeTimeout }}
{{- end }}
{{- if .Values.prometheus.monitor.proxyUrl }}
proxyUrl: {{ .Values.prometheus.monitor.proxyUrl}}
{{- end }}
{{- if .Values.prometheus.monitor.honorLabels }}
honorLabels: true
{{- end }}
{{ if .Values.selfMonitor.enabled }}
{{- if .Values.prometheus.monitor.metricRelabelings }}
metricRelabelings:
{{- toYaml .Values.prometheus.monitor.metricRelabelings | nindent 8 }}
{{- end }}
{{- if .Values.prometheus.monitor.relabelings }}
relabelings:
{{- toYaml .Values.prometheus.monitor.relabelings | nindent 8 }}
{{- end }}
{{- if .Values.selfMonitor.enabled }}
- port: metrics
{{- if .Values.prometheus.monitor.interval }}
interval: {{ .Values.prometheus.monitor.interval }}
{{- end }}
{{- if .Values.prometheus.monitor.scrapeTimeout }}
scrapeTimeout: {{ .Values.prometheus.monitor.scrapeTimeout }}
{{- end }}
{{- if .Values.prometheus.monitor.proxyUrl }}
proxyUrl: {{ .Values.prometheus.monitor.proxyUrl}}
{{- end }}
{{- if .Values.prometheus.monitor.honorLabels }}
honorLabels: true
{{- end }}
{{ end }}
{{- if .Values.prometheus.monitor.metricRelabelings }}
metricRelabelings:
{{- toYaml .Values.prometheus.monitor.metricRelabelings | nindent 8 }}
{{- end }}
{{- if .Values.prometheus.monitor.relabelings }}
relabelings:
{{- toYaml .Values.prometheus.monitor.relabelings | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -5,10 +5,7 @@ metadata:
name: stsdiscovery-{{ template "kube-state-metrics.fullname" . }}
namespace: {{ template "kube-state-metrics.namespace" . }}
labels:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }}
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- include "kube-state-metrics.labels" . | indent 4 }}
rules:
- apiGroups:
- ""

View File

@ -5,10 +5,7 @@ metadata:
name: stsdiscovery-{{ template "kube-state-metrics.fullname" . }}
namespace: {{ template "kube-state-metrics.namespace" . }}
labels:
app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }}
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- include "kube-state-metrics.labels" . | indent 4 }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role

View File

@ -2,7 +2,7 @@
prometheusScrape: true
image:
repository: k8s.gcr.io/kube-state-metrics/kube-state-metrics
tag: v2.1.1
tag: v2.2.4
pullPolicy: IfNotPresent
imagePullSecrets: []
@ -30,7 +30,9 @@ service:
loadBalancerIP: ""
annotations: {}
## Additional labels to add to all resources
customLabels: {}
# app: kube-state-metrics
hostNetwork: false
@ -63,7 +65,12 @@ prometheus:
enabled: false
additionalLabels: {}
namespace: ""
interval: ""
scrapeTimeout: ""
proxyUrl: ""
honorLabels: false
metricRelabelings: []
relabelings: []
## Specify if a Pod Security Policy for kube-state-metrics must be created
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/
@ -133,6 +140,15 @@ metricDenylist: []
metricLabelsAllowlist: []
# - namespaces=[k8s-label-1,k8s-label-n]
# Comma-separated list of Kubernetes annotations keys that will be used in the resource'
# labels metric. By default the metric contains only name and namespace labels.
# To include additional annotations provide a list of resource names in their plural form and Kubernetes
# annotation keys you would like to allow for them (Example: '=namespaces=[kubernetes.io/team,...],pods=[kubernetes.io/team],...)'.
# A single '*' can be provided per resource instead to allow any annotations, but that has
# severe performance implications (Example: '=pods=[*]').
metricAnnotationsAllowList: []
# - pods=[k8s-annotation-1,k8s-annotation-n]
# Available collectors for kube-state-metrics.
# By default, all available resources are enabled, comment out to disable.
collectors:

View File

@ -9,10 +9,9 @@ keywords:
maintainers:
- email: gianrubio@gmail.com
name: gianrubio
- name: vsliouniaev
- name: bismarck
name: prometheus-node-exporter
sources:
- https://github.com/prometheus/node_exporter/
type: application
version: 2.0.4
version: 2.2.2

View File

@ -64,3 +64,18 @@ Allow the release namespace to be overridden for multi-namespace deployments in
{{- .Release.Namespace -}}
{{- end -}}
{{- end -}}
{{/*
Create the namespace name of the service monitor
*/}}
{{- define "prometheus-node-exporter.monitor-namespace" -}}
{{- if .Values.namespaceOverride -}}
{{- .Values.namespaceOverride -}}
{{- else -}}
{{- if .Values.prometheus.monitor.namespace -}}
{{- .Values.prometheus.monitor.namespace -}}
{{- else -}}
{{- .Release.Namespace -}}
{{- end -}}
{{- end -}}
{{- end -}}

View File

@ -21,7 +21,7 @@ spec:
{{- toYaml .Values.podAnnotations | nindent 8 }}
{{- end }}
spec:
automountServiceAccountToken: false
automountServiceAccountToken: {{ .Values.serviceAccount.automountServiceAccountToken }}
serviceAccountName: {{ template "prometheus-node-exporter.serviceAccountName" . }}
{{- if .Values.securityContext }}
securityContext:
@ -62,7 +62,7 @@ spec:
fieldPath: status.hostIP
{{- end }}
ports:
- name: metrics
- name: {{ .Values.service.portName }}
containerPort: {{ .Values.service.port }}
protocol: TCP
livenessProbe:

View File

@ -12,7 +12,7 @@ subsets:
- ip: {{ . }}
{{- end }}
ports:
- name: metrics
- name: {{ .Values.service.portName }}
port: 9100
protocol: TCP
{{- end }}

View File

@ -3,7 +3,7 @@ apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ template "prometheus-node-exporter.fullname" . }}
namespace: {{ template "prometheus-node-exporter.namespace" . }}
namespace: {{ template "prometheus-node-exporter.monitor-namespace" . }}
labels: {{ include "prometheus-node-exporter.labels" . | indent 4 }}
{{- if .Values.prometheus.monitor.additionalLabels }}
{{ toYaml .Values.prometheus.monitor.additionalLabels | indent 4 }}
@ -14,7 +14,7 @@ spec:
app: {{ template "prometheus-node-exporter.name" . }}
release: {{ .Release.Name }}
endpoints:
- port: metrics
- port: {{ .Values.service.portName }}
scheme: {{ $.Values.prometheus.monitor.scheme }}
{{- if $.Values.prometheus.monitor.bearerTokenFile }}
bearerTokenFile: {{ $.Values.prometheus.monitor.bearerTokenFile }}

View File

@ -17,7 +17,7 @@ spec:
{{- end }}
targetPort: {{ .Values.service.targetPort }}
protocol: TCP
name: metrics
name: {{ .Values.service.portName }}
selector:
app: {{ template "prometheus-node-exporter.name" . }}
release: {{ .Release.Name }}

View File

@ -11,6 +11,7 @@ service:
port: 9100
targetPort: 9100
nodePort:
portName: metrics
listenOnAllInterfaces: true
annotations:
prometheus.io/scrape: "true"
@ -57,6 +58,7 @@ serviceAccount:
name:
annotations: {}
imagePullSecrets: []
automountServiceAccountToken: false
securityContext:
fsGroup: 65534

View File

@ -1,11 +1,11 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.50.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.4.1
controller-gen.kubebuilder.io/version: v0.6.2
creationTimestamp: null
name: alertmanagerconfigs.monitoring.coreos.com
spec:
@ -198,7 +198,8 @@ spec:
description: Whether or not to notify about resolved alerts.
type: boolean
smarthost:
description: The SMTP host through which emails are sent.
description: The SMTP host and port through which emails
are sent. E.g. example.com:25
type: string
text:
description: The text body of the email notification.
@ -391,8 +392,42 @@ spec:
httpConfig:
description: HTTP client configuration.
properties:
authorization:
description: Authorization header configuration for
the client. This is mutually exclusive with BasicAuth
and is only available starting from Alertmanager
v0.22+.
properties:
credentials:
description: The secret's key that contains the
credentials of the request
properties:
key:
description: The key of the secret to select
from. Must be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the Secret or
its key must be defined
type: boolean
required:
- key
type: object
type:
description: Set the authentication type. Defaults
to Bearer, Basic will cause an error
type: string
type: object
basicAuth:
description: BasicAuth for the client.
description: BasicAuth for the client. This is mutually
exclusive with Authorization. If both are defined,
BasicAuth takes precedence.
properties:
password:
description: The secret in the service monitor
@ -683,8 +718,42 @@ spec:
httpConfig:
description: HTTP client configuration.
properties:
authorization:
description: Authorization header configuration for
the client. This is mutually exclusive with BasicAuth
and is only available starting from Alertmanager
v0.22+.
properties:
credentials:
description: The secret's key that contains the
credentials of the request
properties:
key:
description: The key of the secret to select
from. Must be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the Secret or
its key must be defined
type: boolean
required:
- key
type: object
type:
description: Set the authentication type. Defaults
to Bearer, Basic will cause an error
type: string
type: object
basicAuth:
description: BasicAuth for the client.
description: BasicAuth for the client. This is mutually
exclusive with Authorization. If both are defined,
BasicAuth takes precedence.
properties:
password:
description: The secret in the service monitor
@ -959,8 +1028,42 @@ spec:
httpConfig:
description: HTTP client configuration.
properties:
authorization:
description: Authorization header configuration for
the client. This is mutually exclusive with BasicAuth
and is only available starting from Alertmanager
v0.22+.
properties:
credentials:
description: The secret's key that contains the
credentials of the request
properties:
key:
description: The key of the secret to select
from. Must be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the Secret or
its key must be defined
type: boolean
required:
- key
type: object
type:
description: Set the authentication type. Defaults
to Bearer, Basic will cause an error
type: string
type: object
basicAuth:
description: BasicAuth for the client.
description: BasicAuth for the client. This is mutually
exclusive with Authorization. If both are defined,
BasicAuth takes precedence.
properties:
password:
description: The secret in the service monitor
@ -1350,8 +1453,42 @@ spec:
httpConfig:
description: HTTP client configuration.
properties:
authorization:
description: Authorization header configuration for
the client. This is mutually exclusive with BasicAuth
and is only available starting from Alertmanager
v0.22+.
properties:
credentials:
description: The secret's key that contains the
credentials of the request
properties:
key:
description: The key of the secret to select
from. Must be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the Secret or
its key must be defined
type: boolean
required:
- key
type: object
type:
description: Set the authentication type. Defaults
to Bearer, Basic will cause an error
type: string
type: object
basicAuth:
description: BasicAuth for the client.
description: BasicAuth for the client. This is mutually
exclusive with Authorization. If both are defined,
BasicAuth takes precedence.
properties:
password:
description: The secret in the service monitor
@ -1635,8 +1772,42 @@ spec:
httpConfig:
description: The HTTP client's configuration.
properties:
authorization:
description: Authorization header configuration for
the client. This is mutually exclusive with BasicAuth
and is only available starting from Alertmanager
v0.22+.
properties:
credentials:
description: The secret's key that contains the
credentials of the request
properties:
key:
description: The key of the secret to select
from. Must be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the Secret or
its key must be defined
type: boolean
required:
- key
type: object
type:
description: Set the authentication type. Defaults
to Bearer, Basic will cause an error
type: string
type: object
basicAuth:
description: BasicAuth for the client.
description: BasicAuth for the client. This is mutually
exclusive with Authorization. If both are defined,
BasicAuth takes precedence.
properties:
password:
description: The secret in the service monitor
@ -1864,8 +2035,42 @@ spec:
httpConfig:
description: HTTP client configuration.
properties:
authorization:
description: Authorization header configuration for
the client. This is mutually exclusive with BasicAuth
and is only available starting from Alertmanager
v0.22+.
properties:
credentials:
description: The secret's key that contains the
credentials of the request
properties:
key:
description: The key of the secret to select
from. Must be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the Secret or
its key must be defined
type: boolean
required:
- key
type: object
type:
description: Set the authentication type. Defaults
to Bearer, Basic will cause an error
type: string
type: object
basicAuth:
description: BasicAuth for the client.
description: BasicAuth for the client. This is mutually
exclusive with Authorization. If both are defined,
BasicAuth takes precedence.
properties:
password:
description: The secret in the service monitor
@ -2143,8 +2348,42 @@ spec:
httpConfig:
description: HTTP client configuration.
properties:
authorization:
description: Authorization header configuration for
the client. This is mutually exclusive with BasicAuth
and is only available starting from Alertmanager
v0.22+.
properties:
credentials:
description: The secret's key that contains the
credentials of the request
properties:
key:
description: The key of the secret to select
from. Must be a valid secret key.
type: string
name:
description: 'Name of the referent. More info:
https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion,
kind, uid?'
type: string
optional:
description: Specify whether the Secret or
its key must be defined
type: boolean
required:
- key
type: object
type:
description: Set the authentication type. Defaults
to Bearer, Basic will cause an error
type: string
type: object
basicAuth:
description: BasicAuth for the client.
description: BasicAuth for the client. This is mutually
exclusive with Authorization. If both are defined,
BasicAuth takes precedence.
properties:
password:
description: The secret in the service monitor

View File

@ -1,11 +1,11 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.50.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.4.1
controller-gen.kubebuilder.io/version: v0.6.2
creationTimestamp: null
name: podmonitors.monitoring.coreos.com
spec:

View File

@ -1,11 +1,11 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.50.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.4.1
controller-gen.kubebuilder.io/version: v0.6.2
creationTimestamp: null
name: probes.monitoring.coreos.com
spec:
@ -153,6 +153,50 @@ spec:
and newer.
format: int64
type: integer
metricRelabelings:
description: MetricRelabelConfigs to apply to samples before ingestion.
items:
description: 'RelabelConfig allows dynamic rewriting of the label
set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section
of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
description: Action to perform based on regex matching. Default
is 'replace'
type: string
modulus:
description: Modulus to take of the hash of the source label
values.
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. Default is '(.*)'
type: string
replacement:
description: Replacement value against which a regex replace
is performed if the regular expression matches. Regex capture
groups are available. Default is '$1'
type: string
separator:
description: Separator placed between concatenated source label
values. default is ';'.
type: string
sourceLabels:
description: The source labels select values from existing labels.
Their content is concatenated using the configured separator
and matched against the configured regular expression for
the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written in
a replace action. It is mandatory for replace actions. Regex
capture groups are available.
type: string
type: object
type: array
module:
description: 'The module to use for probing specifying how to probe
the target. Example module configuring in the blackbox exporter:

View File

@ -1,11 +1,11 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.50.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.4.1
controller-gen.kubebuilder.io/version: v0.6.2
creationTimestamp: null
name: prometheusrules.monitoring.coreos.com
spec:

View File

@ -1,11 +1,11 @@
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.50.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.4.1
controller-gen.kubebuilder.io/version: v0.6.2
creationTimestamp: null
name: servicemonitors.monitoring.coreos.com
spec:

View File

@ -96,6 +96,39 @@ Allow the release namespace to be overridden for multi-namespace deployments in
{{- end -}}
{{- end -}}
{{/*
Use the grafana namespace override for multi-namespace deployments in combined charts
*/}}
{{- define "kube-prometheus-stack-grafana.namespace" -}}
{{- if .Values.grafana.namespaceOverride -}}
{{- .Values.grafana.namespaceOverride -}}
{{- else -}}
{{- .Release.Namespace -}}
{{- end -}}
{{- end -}}
{{/*
Use the kube-state-metrics namespace override for multi-namespace deployments in combined charts
*/}}
{{- define "kube-prometheus-stack-kube-state-metrics.namespace" -}}
{{- if index .Values "kube-state-metrics" "namespaceOverride" -}}
{{- index .Values "kube-state-metrics" "namespaceOverride" -}}
{{- else -}}
{{- .Release.Namespace -}}
{{- end -}}
{{- end -}}
{{/*
Use the prometheus-node-exporter namespace override for multi-namespace deployments in combined charts
*/}}
{{- define "kube-prometheus-stack-prometheus-node-exporter.namespace" -}}
{{- if index .Values "prometheus-node-exporter" "namespaceOverride" -}}
{{- index .Values "prometheus-node-exporter" "namespaceOverride" -}}
{{- else -}}
{{- .Release.Namespace -}}
{{- end -}}
{{- end -}}
{{/* Allow KubeVersion to be overridden. */}}
{{- define "kube-prometheus-stack.kubeVersion" -}}
{{- default .Capabilities.KubeVersion.Version .Values.kubeVersionOverride -}}

View File

@ -81,6 +81,7 @@ spec:
{{- end }}
{{- if or .Values.alertmanager.alertmanagerSpec.podAntiAffinity .Values.alertmanager.alertmanagerSpec.affinity }}
affinity:
{{- end }}
{{- if .Values.alertmanager.alertmanagerSpec.affinity }}
{{ toYaml .Values.alertmanager.alertmanagerSpec.affinity | indent 4 }}
{{- end }}
@ -103,7 +104,6 @@ spec:
- {key: app, operator: In, values: [alertmanager]}
- {key: alertmanager, operator: In, values: [{{ template "kube-prometheus-stack.fullname" . }}-alertmanager]}
{{- end }}
{{- end }}
{{- if .Values.alertmanager.alertmanagerSpec.tolerations }}
tolerations:
{{ toYaml .Values.alertmanager.alertmanagerSpec.tolerations | indent 4 }}
@ -140,10 +140,10 @@ spec:
{{ toYaml .Values.alertmanager.alertmanagerSpec.volumeMounts | indent 4 }}
{{- end }}
portName: {{ .Values.alertmanager.alertmanagerSpec.portName }}
{{- end }}
{{- if .Values.alertmanager.alertmanagerSpec.clusterAdvertiseAddress }}
clusterAdvertiseAddress: {{ .Values.alertmanager.alertmanagerSpec.clusterAdvertiseAddress }}
{{- end }}
{{- if .Values.alertmanager.alertmanagerSpec.forceEnableClusterMode }}
forceEnableClusterMode: {{ .Values.alertmanager.alertmanagerSpec.forceEnableClusterMode }}
{{- end }}
{{- end }}

View File

@ -44,7 +44,7 @@ spec:
{{ toYaml .Values.alertmanager.service.additionalPorts | indent 2 }}
{{- end }}
selector:
app: alertmanager
app.kubernetes.io/name: alertmanager
alertmanager: {{ template "kube-prometheus-stack.fullname" . }}-alertmanager
type: "{{ .Values.alertmanager.service.type }}"
{{- end }}

View File

@ -38,7 +38,7 @@ items:
port: {{ $serviceValues.port }}
targetPort: {{ $serviceValues.targetPort }}
selector:
app: alertmanager
app.kubernetes.io/name: alertmanager
alertmanager: {{ template "kube-prometheus-stack.fullname" $ }}-alertmanager
statefulset.kubernetes.io/pod-name: alertmanager-{{ include "kube-prometheus-stack.fullname" $ }}-alertmanager-{{ $i }}
type: "{{ $serviceValues.type }}"

View File

@ -31,6 +31,6 @@ spec:
{{- end }}
{{- if .Values.coreDns.serviceMonitor.relabelings }}
relabelings:
{{ toYaml .Values.coreDns.serviceMonitor.relabelings | indent 4 }}
{{ tpl (toYaml .Values.coreDns.serviceMonitor.relabelings | indent 4) . }}
{{- end }}
{{- end }}

View File

@ -24,7 +24,7 @@ spec:
{{- end }}
{{- if .Values.kubeApiServer.serviceMonitor.relabelings }}
relabelings:
{{ toYaml .Values.kubeApiServer.serviceMonitor.relabelings | indent 6 }}
{{ tpl (toYaml .Values.kubeApiServer.serviceMonitor.relabelings | indent 6) . }}
{{- end }}
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt

View File

@ -42,6 +42,6 @@ spec:
{{- end }}
{{- if .Values.kubeControllerManager.serviceMonitor.relabelings }}
relabelings:
{{ toYaml .Values.kubeControllerManager.serviceMonitor.relabelings | indent 4 }}
{{ tpl (toYaml .Values.kubeControllerManager.serviceMonitor.relabelings | indent 4) . }}
{{- end }}
{{- end }}

View File

@ -44,6 +44,6 @@ spec:
{{- end }}
{{- if .Values.kubeDns.serviceMonitor.relabelings }}
relabelings:
{{ toYaml .Values.kubeDns.serviceMonitor.relabelings | indent 4 }}
{{ tpl (toYaml .Values.kubeDns.serviceMonitor.relabelings | indent 4) . }}
{{- end }}
{{- end }}

View File

@ -48,6 +48,6 @@ spec:
{{- end }}
{{- if .Values.kubeEtcd.serviceMonitor.relabelings }}
relabelings:
{{ toYaml .Values.kubeEtcd.serviceMonitor.relabelings | indent 4 }}
{{ tpl (toYaml .Values.kubeEtcd.serviceMonitor.relabelings | indent 4) . }}
{{- end }}
{{- end }}

View File

@ -32,10 +32,10 @@ spec:
{{- end}}
{{- if .Values.kubeProxy.serviceMonitor.metricRelabelings }}
metricRelabelings:
{{ toYaml .Values.kubeProxy.serviceMonitor.metricRelabelings | indent 4 }}
{{ tpl (toYaml .Values.kubeProxy.serviceMonitor.metricRelabelings | indent 4) . }}
{{- end }}
{{- if .Values.kubeProxy.serviceMonitor.relabelings }}
relabelings:
{{ toYaml .Values.kubeProxy.serviceMonitor.relabelings | indent 4 }}
{{ tpl (toYaml .Values.kubeProxy.serviceMonitor.relabelings | indent 4) . }}
{{- end }}
{{- end }}

View File

@ -42,6 +42,6 @@ spec:
{{- end }}
{{- if .Values.kubeScheduler.serviceMonitor.relabelings }}
relabelings:
{{ toYaml .Values.kubeScheduler.serviceMonitor.relabelings | indent 4 }}
{{ tpl (toYaml .Values.kubeScheduler.serviceMonitor.relabelings | indent 4) . }}
{{- end }}
{{- end }}

View File

@ -27,7 +27,7 @@ spec:
{{- end }}
{{- if .Values.kubeStateMetrics.serviceMonitor.relabelings }}
relabelings:
{{ toYaml .Values.kubeStateMetrics.serviceMonitor.relabelings | indent 4 }}
{{ tpl (toYaml .Values.kubeStateMetrics.serviceMonitor.relabelings | indent 4) . }}
{{- end }}
{{- if .Values.kubeStateMetrics.serviceMonitor.selfMonitor.enabled }}
- port: metrics
@ -44,13 +44,13 @@ spec:
{{- end }}
{{- if .Values.kubeStateMetrics.serviceMonitor.relabelings }}
relabelings:
{{ toYaml .Values.kubeStateMetrics.serviceMonitor.relabelings | indent 4 }}
{{ tpl (toYaml .Values.kubeStateMetrics.serviceMonitor.relabelings | indent 4) . }}
{{- end }}
{{- end }}
{{- if .Values.kubeStateMetrics.serviceMonitor.namespaceOverride }}
{{- if ne (include "kube-prometheus-stack.namespace" .) (include "kube-prometheus-stack-kube-state-metrics.namespace" .) }}
namespaceSelector:
matchNames:
- {{ .Values.kubeStateMetrics.serviceMonitor.namespaceOverride }}
- {{ printf "%s" (include "kube-prometheus-stack-kube-state-metrics.namespace" .) | quote }}
{{- end }}
selector:
matchLabels:

View File

@ -18,6 +18,9 @@ spec:
{{- if .Values.kubelet.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl }}
{{- end }}
{{- if .Values.kubelet.serviceMonitor.scrapeTimeout }}
scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }}
{{- end }}
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecureSkipVerify: true
@ -29,7 +32,7 @@ spec:
{{- end }}
{{- if .Values.kubelet.serviceMonitor.relabelings }}
relabelings:
{{ toYaml .Values.kubelet.serviceMonitor.relabelings | indent 4 }}
{{ tpl (toYaml .Values.kubelet.serviceMonitor.relabelings | indent 4) . }}
{{- end }}
{{- if .Values.kubelet.serviceMonitor.cAdvisor }}
- port: https-metrics
@ -55,7 +58,7 @@ spec:
{{- end }}
{{- if .Values.kubelet.serviceMonitor.cAdvisorRelabelings }}
relabelings:
{{ toYaml .Values.kubelet.serviceMonitor.cAdvisorRelabelings | indent 4 }}
{{ tpl (toYaml .Values.kubelet.serviceMonitor.cAdvisorRelabelings | indent 4) . }}
{{- end }}
{{- end }}
{{- if .Values.kubelet.serviceMonitor.probes }}
@ -68,6 +71,9 @@ spec:
{{- if .Values.kubelet.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl }}
{{- end }}
{{- if .Values.kubelet.serviceMonitor.scrapeTimeout }}
scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }}
{{- end }}
honorLabels: true
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
@ -79,7 +85,7 @@ spec:
{{- end }}
{{- if .Values.kubelet.serviceMonitor.probesRelabelings }}
relabelings:
{{ toYaml .Values.kubelet.serviceMonitor.probesRelabelings | indent 4 }}
{{ tpl (toYaml .Values.kubelet.serviceMonitor.probesRelabelings | indent 4) . }}
{{- end }}
{{- end }}
{{- if .Values.kubelet.serviceMonitor.resource }}
@ -92,6 +98,9 @@ spec:
{{- if .Values.kubelet.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl }}
{{- end }}
{{- if .Values.kubelet.serviceMonitor.scrapeTimeout }}
scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }}
{{- end }}
honorLabels: true
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
@ -103,7 +112,7 @@ spec:
{{- end }}
{{- if .Values.kubelet.serviceMonitor.resourceRelabelings }}
relabelings:
{{ toYaml .Values.kubelet.serviceMonitor.resourceRelabelings | indent 4 }}
{{ tpl (toYaml .Values.kubelet.serviceMonitor.resourceRelabelings | indent 4) . }}
{{- end }}
{{- end }}
{{- else }}
@ -114,6 +123,9 @@ spec:
{{- if .Values.kubelet.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl }}
{{- end }}
{{- if .Values.kubelet.serviceMonitor.scrapeTimeout }}
scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }}
{{- end }}
honorLabels: true
{{- if .Values.kubelet.serviceMonitor.metricRelabelings }}
metricRelabelings:
@ -121,7 +133,7 @@ spec:
{{- end }}
{{- if .Values.kubelet.serviceMonitor.relabelings }}
relabelings:
{{ toYaml .Values.kubelet.serviceMonitor.relabelings | indent 4 }}
{{ tpl (toYaml .Values.kubelet.serviceMonitor.relabelings | indent 4) . }}
{{- end }}
{{- if .Values.kubelet.serviceMonitor.cAdvisor }}
- port: http-metrics
@ -132,6 +144,9 @@ spec:
{{- if .Values.kubelet.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl }}
{{- end }}
{{- if .Values.kubelet.serviceMonitor.scrapeTimeout }}
scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }}
{{- end }}
honorLabels: true
{{- if .Values.kubelet.serviceMonitor.cAdvisorMetricRelabelings }}
metricRelabelings:
@ -139,7 +154,7 @@ spec:
{{- end }}
{{- if .Values.kubelet.serviceMonitor.cAdvisorRelabelings }}
relabelings:
{{ toYaml .Values.kubelet.serviceMonitor.cAdvisorRelabelings | indent 4 }}
{{ tpl (toYaml .Values.kubelet.serviceMonitor.cAdvisorRelabelings | indent 4) . }}
{{- end }}
{{- if .Values.kubelet.serviceMonitor.resource }}
- port: http-metrics
@ -150,6 +165,9 @@ spec:
{{- if .Values.kubelet.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl }}
{{- end }}
{{- if .Values.kubelet.serviceMonitor.scrapeTimeout }}
scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }}
{{- end }}
honorLabels: true
{{- if .Values.kubelet.serviceMonitor.resourceMetricRelabelings }}
metricRelabelings:
@ -157,7 +175,7 @@ spec:
{{- end }}
{{- if .Values.kubelet.serviceMonitor.resourceRelabelings }}
relabelings:
{{ toYaml .Values.kubelet.serviceMonitor.resourceRelabelings | indent 4 }}
{{ tpl (toYaml .Values.kubelet.serviceMonitor.resourceRelabelings | indent 4) . }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -13,13 +13,13 @@ spec:
matchLabels:
app: prometheus-node-exporter
release: {{ $.Release.Name }}
{{- if (index .Values "prometheus-node-exporter" "namespaceOverride") }}
{{- if ne (include "kube-prometheus-stack.namespace" .) (include "kube-prometheus-stack-prometheus-node-exporter.namespace" .) }}
namespaceSelector:
matchNames:
- {{ index .Values "prometheus-node-exporter" "namespaceOverride" }}
- {{ printf "%s" (include "kube-prometheus-stack-prometheus-node-exporter.namespace" .) | quote }}
{{- end }}
endpoints:
- port: metrics
- port: {{ index .Values "prometheus-node-exporter" "service" "portName" }}
{{- if .Values.nodeExporter.serviceMonitor.interval }}
interval: {{ .Values.nodeExporter.serviceMonitor.interval }}
{{- end }}
@ -35,6 +35,6 @@ spec:
{{- end }}
{{- if .Values.nodeExporter.serviceMonitor.relabelings }}
relabelings:
{{ toYaml .Values.nodeExporter.serviceMonitor.relabelings | indent 4 }}
{{ tpl (toYaml .Values.nodeExporter.serviceMonitor.relabelings | indent 4) . }}
{{- end }}
{{- end }}

View File

@ -10,7 +10,7 @@ items:
kind: ConfigMap
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) $dashboardName | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" $ }}
namespace: {{ template "kube-prometheus-stack-grafana.namespace" $ }}
labels:
{{- if $.Values.grafana.sidecar.dashboards.label }}
{{ $.Values.grafana.sidecar.dashboards.label }}: "1"

View File

@ -3,7 +3,7 @@ apiVersion: v1
kind: ConfigMap
metadata:
name: {{ template "kube-prometheus-stack.fullname" . }}-grafana-datasource
namespace: {{ template "kube-prometheus-stack.namespace" . }}
namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
{{- if .Values.grafana.sidecar.datasources.annotations }}
annotations:
{{ toYaml .Values.grafana.sidecar.datasources.annotations | indent 4 }}
@ -15,6 +15,10 @@ metadata:
data:
datasource.yaml: |-
apiVersion: 1
{{- if .Values.grafana.deleteDatasources }}
deleteDatasources:
{{ tpl (toYaml .Values.grafana.deleteDatasources | indent 6) . }}
{{- end }}
datasources:
{{- $scrapeInterval := .Values.grafana.sidecar.datasources.defaultDatasourceScrapeInterval | default .Values.prometheus.prometheusSpec.scrapeInterval | default "30s" }}
{{- if .Values.grafana.sidecar.datasources.defaultDatasourceEnabled }}
@ -23,7 +27,7 @@ data:
{{- if .Values.grafana.sidecar.datasources.url }}
url: {{ .Values.grafana.sidecar.datasources.url }}
{{- else }}
url: http://{{ template "kube-prometheus-stack.fullname" . }}-prometheus:{{ .Values.prometheus.service.port }}/{{ trimPrefix "/" .Values.prometheus.prometheusSpec.routePrefix }}
url: http://{{ template "kube-prometheus-stack.fullname" . }}-prometheus.{{ template "kube-prometheus-stack.namespace" . }}:{{ .Values.prometheus.service.port }}/{{ trimPrefix "/" .Values.prometheus.prometheusSpec.routePrefix }}
{{- end }}
access: proxy
isDefault: true

View File

@ -12,9 +12,11 @@ spec:
matchLabels:
app.kubernetes.io/name: grafana
app.kubernetes.io/instance: {{ $.Release.Name | quote }}
{{- if ne (include "kube-prometheus-stack.namespace" .) (include "kube-prometheus-stack-grafana.namespace" .) }}
namespaceSelector:
matchNames:
- {{ printf "%s" (include "kube-prometheus-stack.namespace" .) | quote }}
- {{ printf "%s" (include "kube-prometheus-stack-grafana.namespace" .) | quote }}
{{- end }}
endpoints:
- port: {{ .Values.grafana.service.portName }}
{{- if .Values.grafana.serviceMonitor.interval }}

View File

@ -6,12 +6,11 @@ metadata:
annotations:
"helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
labels:
app: {{ template "kube-prometheus-stack.name" . }}-admission
{{- if .Values.global.rbac.pspAnnotations }}
annotations:
{{ toYaml .Values.global.rbac.pspAnnotations | indent 4 }}
{{- end }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}-admission
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
spec:
privileged: false

View File

@ -1,4 +1,5 @@
{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}
{{- $defaultKubeletSvcName := printf "%s-kubelet" (include "kube-prometheus-stack.fullname" .) }}
{{- if .Values.prometheusOperator.enabled }}
apiVersion: apps/v1
kind: Deployment
@ -40,7 +41,7 @@ spec:
imagePullPolicy: "{{ .Values.prometheusOperator.image.pullPolicy }}"
args:
{{- if .Values.prometheusOperator.kubeletService.enabled }}
- --kubelet-service={{ .Values.prometheusOperator.kubeletService.namespace }}/{{ template "kube-prometheus-stack.fullname" . }}-kubelet
- --kubelet-service={{ .Values.prometheusOperator.kubeletService.namespace }}/{{ default $defaultKubeletSvcName .Values.prometheusOperator.kubeletService.name }}
{{- end }}
{{- if .Values.prometheusOperator.logFormat }}
- --log-format={{ .Values.prometheusOperator.logFormat }}
@ -65,15 +66,15 @@ spec:
{{- if .Values.prometheusOperator.alertmanagerDefaultBaseImage }}
- --alertmanager-default-base-image={{ .Values.prometheusOperator.alertmanagerDefaultBaseImage }}
{{- end }}
{{- if .Values.prometheusOperator.prometheusConfigReloaderImage.sha }}
- --prometheus-config-reloader={{ .Values.prometheusOperator.prometheusConfigReloaderImage.repository }}:{{ .Values.prometheusOperator.prometheusConfigReloaderImage.tag }}@sha256:{{ .Values.prometheusOperator.prometheusConfigReloaderImage.sha }}
{{- if .Values.prometheusOperator.prometheusConfigReloader.image.sha }}
- --prometheus-config-reloader={{ .Values.prometheusOperator.prometheusConfigReloader.image.repository }}:{{ .Values.prometheusOperator.prometheusConfigReloader.image.tag }}@sha256:{{ .Values.prometheusOperator.prometheusConfigReloader.image.sha }}
{{- else }}
- --prometheus-config-reloader={{ .Values.prometheusOperator.prometheusConfigReloaderImage.repository }}:{{ .Values.prometheusOperator.prometheusConfigReloaderImage.tag }}
- --prometheus-config-reloader={{ .Values.prometheusOperator.prometheusConfigReloader.image.repository }}:{{ .Values.prometheusOperator.prometheusConfigReloader.image.tag }}
{{- end }}
- --config-reloader-cpu-request={{ .Values.prometheusOperator.configReloaderCpu }}
- --config-reloader-cpu-limit={{ .Values.prometheusOperator.configReloaderCpu }}
- --config-reloader-memory-request={{ .Values.prometheusOperator.configReloaderMemory }}
- --config-reloader-memory-limit={{ .Values.prometheusOperator.configReloaderMemory }}
- --config-reloader-cpu-request={{ .Values.prometheusOperator.prometheusConfigReloader.resources.requests.cpu }}
- --config-reloader-cpu-limit={{ .Values.prometheusOperator.prometheusConfigReloader.resources.limits.cpu }}
- --config-reloader-memory-request={{ .Values.prometheusOperator.prometheusConfigReloader.resources.requests.memory }}
- --config-reloader-memory-limit={{ .Values.prometheusOperator.prometheusConfigReloader.resources.limits.memory }}
{{- if .Values.prometheusOperator.alertmanagerInstanceNamespaces }}
- --alertmanager-instance-namespaces={{ .Values.prometheusOperator.alertmanagerInstanceNamespaces | join "," }}
{{- end }}

View File

@ -0,0 +1,12 @@
{{- if .Values.prometheus.prometheusSpec.thanos.secretProviderClass }}
---
apiVersion: secrets-store.csi.x-k8s.io/v1alpha1
kind: SecretProviderClass
metadata:
name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}-prometheus
spec:
{{ toYaml .Values.prometheus.prometheusSpec.thanos.secretProviderClass | indent 2 }}
{{- end }}

View File

@ -301,7 +301,6 @@ spec:
disableCompaction: {{ .Values.prometheus.prometheusSpec.disableCompaction }}
{{- end }}
portName: {{ .Values.prometheus.prometheusSpec.portName }}
{{- end }}
{{- if .Values.prometheus.prometheusSpec.volumes }}
volumes:
{{ toYaml .Values.prometheus.prometheusSpec.volumes | indent 4 }}
@ -356,3 +355,4 @@ spec:
{{- if .Values.prometheus.prometheusSpec.allowOverlappingBlocks }}
allowOverlappingBlocks: {{ .Values.prometheus.prometheusSpec.allowOverlappingBlocks }}
{{- end }}
{{- end }}

View File

@ -366,14 +366,16 @@ alertmanager:
bearerTokenFile:
## Metric relabel configs to apply to samples before ingestion.
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
# sourceLabels: [__name__]
# relabel configs to apply to samples before ingestion.
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
@ -396,7 +398,7 @@ alertmanager:
##
image:
repository: quay.io/prometheus/alertmanager
tag: v0.22.2
tag: v0.23.0
sha: ""
## If true then the user will be responsible to provide a secret with alertmanager configuration
@ -596,7 +598,7 @@ alertmanager:
## PortName to use for Alert Manager.
##
portName: "web"
portName: "http-web"
## ClusterAdvertiseAddress is the explicit address to advertise in cluster. Needs to be provided for non RFC1918 [1] (public) addresses. [1] RFC1918: https://tools.ietf.org/html/rfc1918
##
@ -718,6 +720,10 @@ grafana:
# configMap: certs-configmap
# readOnly: true
deleteDatasources: []
# - name: example-datasource
# orgId: 1
## Configure additional grafana datasources (passed through tpl)
## ref: http://docs.grafana.org/administration/provisioning/#datasources
additionalDataSources: []
@ -737,7 +743,7 @@ grafana:
## Passed to grafana subchart and used by servicemonitor below
##
service:
portName: service
portName: http-web
## If true, create a serviceMonitor for grafana
##
@ -751,14 +757,17 @@ grafana:
# in grafana.ini
path: "/metrics"
## Metric relabel configs to apply to samples before ingestion.
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
# sourceLabels: [__name__]
# relabel configs to apply to samples before ingestion.
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
@ -789,12 +798,17 @@ kubeApiServer:
component: apiserver
provider: kubernetes
## Metric relabel configs to apply to samples before ingestion.
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
# sourceLabels: [__name__]
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels:
# - __meta_kubernetes_namespace
@ -839,7 +853,9 @@ kubelet:
resource: false
# From kubernetes 1.18, /metrics/resource/v1alpha1 renamed to /metrics/resource
resourcePath: "/metrics/resource/v1alpha1"
## Metric relabellings to apply to samples before ingestion
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
cAdvisorMetricRelabelings: []
# - sourceLabels: [__name__, image]
@ -853,7 +869,8 @@ kubelet:
# replacement: $1
# action: drop
## Metric relabellings to apply to samples before ingestion
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
probesMetricRelabelings: []
# - sourceLabels: [__name__, image]
@ -867,9 +884,10 @@ kubelet:
# replacement: $1
# action: drop
# relabel configs to apply to samples before ingestion.
# metrics_path is required to match upstream rules and charts
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
## metrics_path is required to match upstream rules and charts
cAdvisorRelabelings:
- sourceLabels: [__metrics_path__]
targetLabel: metrics_path
@ -880,6 +898,9 @@ kubelet:
# replacement: $1
# action: replace
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
probesRelabelings:
- sourceLabels: [__metrics_path__]
targetLabel: metrics_path
@ -890,6 +911,9 @@ kubelet:
# replacement: $1
# action: replace
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
resourceRelabelings:
- sourceLabels: [__metrics_path__]
targetLabel: metrics_path
@ -900,6 +924,9 @@ kubelet:
# replacement: $1
# action: replace
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - sourceLabels: [__name__, image]
# separator: ;
@ -912,9 +939,10 @@ kubelet:
# replacement: $1
# action: drop
# relabel configs to apply to samples before ingestion.
# metrics_path is required to match upstream rules and charts
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
## metrics_path is required to match upstream rules and charts
relabelings:
- sourceLabels: [__metrics_path__]
targetLabel: metrics_path
@ -967,14 +995,16 @@ kubeControllerManager:
# Name of the server to use when validating TLS certificate
serverName: null
## Metric relabel configs to apply to samples before ingestion.
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
# sourceLabels: [__name__]
# relabel configs to apply to samples before ingestion.
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
@ -1002,14 +1032,16 @@ coreDns:
##
proxyUrl: ""
## Metric relabel configs to apply to samples before ingestion.
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
# sourceLabels: [__name__]
# relabel configs to apply to samples before ingestion.
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
@ -1041,14 +1073,16 @@ kubeDns:
##
proxyUrl: ""
## Metric relabel configs to apply to samples before ingestion.
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
# sourceLabels: [__name__]
# relabel configs to apply to samples before ingestion.
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
@ -1057,12 +1091,17 @@ kubeDns:
# targetLabel: nodename
# replacement: $1
# action: replace
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
dnsmasqMetricRelabelings: []
# - action: keep
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
# sourceLabels: [__name__]
# relabel configs to apply to samples before ingestion.
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
dnsmasqRelabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
@ -1119,14 +1158,16 @@ kubeEtcd:
certFile: ""
keyFile: ""
## Metric relabel configs to apply to samples before ingestion.
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
# sourceLabels: [__name__]
# relabel configs to apply to samples before ingestion.
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
@ -1177,14 +1218,16 @@ kubeScheduler:
## Name of the server to use when validating TLS certificate
serverName: null
## Metric relabel configs to apply to samples before ingestion.
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
# sourceLabels: [__name__]
# relabel configs to apply to samples before ingestion.
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
@ -1229,14 +1272,16 @@ kubeProxy:
##
https: false
## Metric relabel configs to apply to samples before ingestion.
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
# sourceLabels: [__name__]
# relabel configs to apply to samples before ingestion.
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
relabelings: []
# - action: keep
@ -1261,18 +1306,17 @@ kubeStateMetrics:
## Override serviceMonitor selector
##
selectorOverride: {}
## Override namespace selector
##
namespaceOverride: ""
## Metric relabel configs to apply to samples before ingestion.
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
# sourceLabels: [__name__]
# relabel configs to apply to samples before ingestion.
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
@ -1320,7 +1364,8 @@ nodeExporter:
##
scrapeTimeout: ""
## Metric relabel configs to apply to samples before ingestion.
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - sourceLabels: [__name__]
@ -1329,7 +1374,8 @@ nodeExporter:
# replacement: $1
# action: drop
## relabel configs to apply to samples before ingestion.
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
@ -1350,6 +1396,8 @@ prometheus-node-exporter:
extraArgs:
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/)
- --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
service:
portName: http-metrics
## Manages Prometheus and Alertmanager components
##
@ -1499,6 +1547,8 @@ prometheusOperator:
kubeletService:
enabled: true
namespace: kube-system
## Use '{{ template "kube-prometheus-stack.fullname" . }}-kubelet' by default
name: ""
## Create a servicemonitor for the operator
##
@ -1589,7 +1639,7 @@ prometheusOperator:
##
image:
repository: quay.io/prometheus-operator/prometheus-operator
tag: v0.50.0
tag: v0.52.0
sha: ""
pullPolicy: IfNotPresent
@ -1601,26 +1651,29 @@ prometheusOperator:
##
# alertmanagerDefaultBaseImage: quay.io/prometheus/alertmanager
## Prometheus-config-reloader image to use for config and rule reloading
## Prometheus-config-reloader
##
prometheusConfigReloaderImage:
prometheusConfigReloader:
# image to use for config and rule reloading
image:
repository: quay.io/prometheus-operator/prometheus-config-reloader
tag: v0.50.0
tag: v0.52.0
sha: ""
## Set the prometheus config reloader side-car CPU limit
##
configReloaderCpu: 100m
## Set the prometheus config reloader side-car memory limit
##
configReloaderMemory: 50Mi
# resource config for prometheusConfigReloader
resources:
requests:
cpu: 100m
memory: 50Mi
limits:
cpu: 100m
memory: 50Mi
## Thanos side-car image when configured
##
thanosImage:
repository: quay.io/thanos/thanos
tag: v0.17.2
tag: v0.23.1
sha: ""
## Set a Field Selector to filter watched secrets
@ -2013,7 +2066,7 @@ prometheus:
##
image:
repository: quay.io/prometheus/prometheus
tag: v2.28.1
tag: v2.31.1
sha: ""
## Tolerations for use with node taints
@ -2425,6 +2478,13 @@ prometheus:
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#thanosspec
##
thanos: {}
# secretProviderClass:
# provider: gcp
# parameters:
# secrets: |
# - resourceName: "projects/$PROJECT_ID/secrets/testsecret/versions/latest"
# fileName: "objstore.yaml"
# objectStorageConfigFile: /var/secrets/object-store.yaml
## Containers allows injecting additional containers. This is meant to allow adding an authentication proxy to a Prometheus pod.
## if using proxy extraContainer update targetPort with proxy container port
@ -2436,7 +2496,7 @@ prometheus:
## PortName to use for Prometheus.
##
portName: "web"
portName: "http-web"
## ArbitraryFSAccessThroughSMs configures whether configuration based on a service monitor can access arbitrary files
## on the file system of the Prometheus container e.g. bearer token files.

View File

@ -1,5 +1,5 @@
apiVersion: v1
appVersion: 1.3.0
appVersion: 1.4.1
description: A Helm chart for prometheus pushgateway
home: https://github.com/prometheus/pushgateway
keywords:
@ -13,4 +13,4 @@ maintainers:
name: prometheus-pushgateway
sources:
- https://github.com/prometheus/pushgateway
version: 1.10.1
version: 1.13.0

View File

@ -61,11 +61,12 @@ spec:
timeoutSeconds: 10
resources:
{{ toYaml .Values.resources | indent 12 }}
{{- if .Values.persistentVolume.enabled }}
volumeMounts:
- name: storage-volume
mountPath: "{{ .Values.persistentVolume.mountPath }}"
subPath: "{{ .Values.persistentVolume.subPath }}"
{{- if .Values.extraVolumeMounts }}
{{ toYaml .Values.extraVolumeMounts | indent 12 }}
{{- end }}
{{- if .Values.nodeSelector }}
nodeSelector:
@ -79,13 +80,18 @@ spec:
affinity:
{{ toYaml .Values.affinity | indent 8 }}
{{- end }}
{{- if .Values.persistentVolume.enabled }}
{{- if .Values.securityContext }}
securityContext:
{{ toYaml .Values.securityContext | indent 8 }}
{{- end }}
volumes:
- name: storage-volume
{{- if .Values.persistentVolume.enabled }}
persistentVolumeClaim:
claimName: {{ if .Values.persistentVolume.existingClaim }}{{ .Values.persistentVolume.existingClaim }}{{- else }}{{ template "prometheus-pushgateway.fullname" . }}{{- end }}
{{- else}}
emptyDir: {}
{{- end -}}
{{- if .Values.extraVolumes }}
{{ toYaml .Values.extraVolumes | indent 8 }}
{{- end }}

View File

@ -2,9 +2,13 @@
{{- $serviceName := include "prometheus-pushgateway.fullname" . }}
{{- $servicePort := .Values.service.port -}}
{{- $ingressPath := .Values.ingress.path -}}
{{- if semverCompare ">=1.14.0-0" .Capabilities.KubeVersion.GitVersion }}
{{- $ingressClassName := .Values.ingress.className -}}
{{- $ingressPathType := .Values.ingress.pathType -}}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1beta1
{{- else }}
{{- else -}}
apiVersion: extensions/v1beta1
{{- end }}
kind: Ingress
@ -18,15 +22,28 @@ metadata:
name: {{ template "prometheus-pushgateway.fullname" . }}
namespace: {{ .Release.Namespace }}
spec:
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion }}
ingressClassName: {{ $ingressClassName }}
{{- end }}
rules:
{{- range $host := .Values.ingress.hosts }}
- host: {{ $host }}
http:
paths:
- path: {{ $ingressPath }}
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
pathType: {{ $ingressPathType }}
{{- end }}
backend:
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
service:
name: {{ $serviceName }}
port:
number: {{ $servicePort }}
{{- else }}
serviceName: {{ $serviceName }}
servicePort: {{ $servicePort }}
{{- end }}
{{- end -}}
{{- if .Values.ingress.tls }}
tls:

View File

@ -10,7 +10,7 @@ fullnameOverride: ""
image:
repository: prom/pushgateway
tag: v1.3.0
tag: v1.4.1
pullPolicy: IfNotPresent
# Optional pod imagePullSecrets
@ -84,7 +84,9 @@ ingress:
##
enabled: false
# AWS ALB requires path of /*
className: ""
path: /
pathType: ImplementationSpecific
## Annotations.
##
@ -218,6 +220,14 @@ persistentVolume:
##
subPath: ""
extraVolumes: {}
# - name: extra
# emptyDir: {}
extraVolumeMounts: {}
# - name: extra
# mountPath: /usr/share/extras
# readOnly: true
# Configuration for clusters with restrictive network policies in place:
# - allowAll allows access to the PushGateway from any namespace
# - customSelector is a list of pod/namespaceSelectors to allow access from

View File

@ -56,7 +56,11 @@
},
"gridPos": { },
"id": 3,
"interval": null,
"interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"links": [ ],
"mappingType": 1,
"mappingTypes": [
@ -128,13 +132,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -236,7 +241,11 @@
},
"gridPos": { },
"id": 5,
"interval": null,
"interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"links": [ ],
"mappingType": 1,
"mappingTypes": [
@ -307,13 +316,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 6,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -406,13 +416,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 7,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -488,13 +499,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 8,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -594,7 +606,11 @@
},
"gridPos": { },
"id": 9,
"interval": null,
"interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"links": [ ],
"mappingType": 1,
"mappingTypes": [
@ -665,13 +681,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 10,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -764,13 +781,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 11,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -846,13 +864,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 12,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -940,13 +959,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 13,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": false,
"sideWidth": null,
"total": false,
@ -968,7 +988,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name)",
"expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{name}}",
@ -1021,13 +1041,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 14,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": false,
"sideWidth": null,
"total": false,
@ -1049,7 +1070,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name)",
"expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{name}}",
@ -1102,6 +1123,7 @@
"fillGradient": 0,
"gridPos": { },
"id": 15,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -1130,7 +1152,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name, le))",
"expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{name}}",
@ -1196,13 +1218,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 16,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -1277,13 +1300,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 17,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -1305,7 +1329,7 @@
"steppedLine": false,
"targets": [
{
"expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])",
"expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -1358,13 +1382,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 18,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -1452,7 +1477,7 @@
"value": "default"
},
"hide": 0,
"label": null,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
@ -1470,7 +1495,7 @@
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(apiserver_request_total, cluster)",
"query": "label_values(up{job=\"apiserver\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@ -1490,7 +1515,7 @@
"multi": false,
"name": "instance",
"options": [ ],
"query": "label_values(apiserver_request_total{job=\"apiserver\", cluster=\"$cluster\"}, instance)",
"query": "label_values(up{job=\"apiserver\", cluster=\"$cluster\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -1619,7 +1619,7 @@
"value": "default"
},
"hide": 0,
"label": null,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",

View File

@ -36,7 +36,11 @@
},
"gridPos": { },
"id": 2,
"interval": null,
"interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"links": [ ],
"mappingType": 1,
"mappingTypes": [
@ -106,6 +110,7 @@
"fillGradient": 0,
"gridPos": { },
"id": 3,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -134,7 +139,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)",
"expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} {{name}}",
@ -200,6 +205,7 @@
"fillGradient": 0,
"gridPos": { },
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -228,7 +234,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)",
"expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} {{name}}",
@ -294,6 +300,7 @@
"fillGradient": 0,
"gridPos": { },
"id": 5,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -322,7 +329,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name, le))",
"expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} {{name}}",
@ -388,13 +395,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 6,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -416,28 +424,28 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "2xx",
"refId": "A"
},
{
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "3xx",
"refId": "B"
},
{
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "4xx",
"refId": "C"
},
{
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
"expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "5xx",
@ -490,13 +498,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 7,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -518,7 +527,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))",
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}",
@ -584,6 +593,7 @@
"fillGradient": 0,
"gridPos": { },
"id": 8,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -612,7 +622,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}",
@ -678,13 +688,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 9,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -759,13 +770,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 10,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -787,7 +799,7 @@
"steppedLine": false,
"targets": [
{
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}[5m])",
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -840,13 +852,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 11,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -934,7 +947,7 @@
"value": "default"
},
"hide": 0,
"label": null,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",

View File

@ -24,10 +24,12 @@
"id": 1,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -47,7 +49,7 @@
"steppedLine": false,
"targets": [
{
"expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[$__rate_interval]))",
"expr": "1 - sum(avg by (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\", cluster=\"$cluster\"}[$__rate_interval])))",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@ -60,7 +62,7 @@
"title": "CPU Utilisation",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@ -99,11 +101,14 @@
"fill": 1,
"format": "percentunit",
"id": 2,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -123,7 +128,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"cpu\",cluster=\"$cluster\"})",
"expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@ -136,7 +141,7 @@
"title": "CPU Requests Commitment",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@ -175,11 +180,14 @@
"fill": 1,
"format": "percentunit",
"id": 3,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -199,7 +207,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"cpu\",cluster=\"$cluster\"})",
"expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@ -212,7 +220,7 @@
"title": "CPU Limits Commitment",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@ -251,11 +259,14 @@
"fill": 1,
"format": "percentunit",
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -275,7 +286,7 @@
"steppedLine": false,
"targets": [
{
"expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{cluster=\"$cluster\"})",
"expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{job=\"node-exporter\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@ -288,7 +299,7 @@
"title": "Memory Utilisation",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@ -327,11 +338,14 @@
"fill": 1,
"format": "percentunit",
"id": 5,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -351,7 +365,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})",
"expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@ -364,7 +378,7 @@
"title": "Memory Requests Commitment",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@ -403,11 +417,14 @@
"fill": 1,
"format": "percentunit",
"id": 6,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -427,7 +444,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})",
"expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@ -440,7 +457,7 @@
"title": "Memory Limits Commitment",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@ -490,11 +507,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 7,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -528,7 +548,7 @@
"title": "CPU Usage",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -578,11 +598,14 @@
"datasource": "$datasource",
"fill": 1,
"id": 8,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -741,7 +764,7 @@
],
"targets": [
{
"expr": "sum(kube_pod_owner{cluster=\"$cluster\"}) by (namespace)",
"expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -810,7 +833,7 @@
"title": "CPU Quota",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"transform": "table",
@ -861,11 +884,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 9,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -885,7 +911,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)",
"expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -899,7 +925,7 @@
"title": "Memory Usage (w/o cache)",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -949,11 +975,14 @@
"datasource": "$datasource",
"fill": 1,
"id": 10,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1112,7 +1141,7 @@
],
"targets": [
{
"expr": "sum(kube_pod_owner{cluster=\"$cluster\"}) by (namespace)",
"expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1130,7 +1159,7 @@
"step": 10
},
{
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)",
"expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1148,7 +1177,7 @@
"step": 10
},
{
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1166,7 +1195,7 @@
"step": 10
},
{
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1181,7 +1210,7 @@
"title": "Requests by Namespace",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"transform": "table",
@ -1234,10 +1263,12 @@
"id": 11,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1381,7 +1412,7 @@
],
"targets": [
{
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1390,7 +1421,7 @@
"step": 10
},
{
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1399,7 +1430,7 @@
"step": 10
},
{
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1408,7 +1439,7 @@
"step": 10
},
{
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1417,7 +1448,7 @@
"step": 10
},
{
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1426,7 +1457,7 @@
"step": 10
},
{
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1441,7 +1472,7 @@
"title": "Current Network Usage",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"transform": "table",
@ -1492,11 +1523,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 12,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1516,7 +1550,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -1530,7 +1564,7 @@
"title": "Receive Bandwidth",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1568,11 +1602,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 13,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1592,7 +1629,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -1606,7 +1643,7 @@
"title": "Transmit Bandwidth",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1656,11 +1693,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 14,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1680,7 +1720,7 @@
"steppedLine": false,
"targets": [
{
"expr": "avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "avg(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -1694,7 +1734,7 @@
"title": "Average Container Bandwidth by Namespace: Received",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1732,11 +1772,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 15,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1756,7 +1799,7 @@
"steppedLine": false,
"targets": [
{
"expr": "avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "avg(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -1770,7 +1813,7 @@
"title": "Average Container Bandwidth by Namespace: Transmitted",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1820,11 +1863,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 16,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1844,7 +1890,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -1858,7 +1904,7 @@
"title": "Rate of Received Packets",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1896,11 +1942,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 17,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1920,7 +1969,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -1934,7 +1983,7 @@
"title": "Rate of Transmitted Packets",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1984,11 +2033,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 18,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -2008,7 +2060,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -2022,7 +2074,7 @@
"title": "Rate of Received Packets Dropped",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -2060,11 +2112,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 19,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -2084,7 +2139,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -2098,7 +2153,7 @@
"title": "Rate of Transmitted Packets Dropped",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -2149,11 +2204,14 @@
"decimals": -1,
"fill": 10,
"id": 20,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -2173,7 +2231,7 @@
"steppedLine": false,
"targets": [
{
"expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m])))",
"expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -2187,7 +2245,7 @@
"title": "IOPS(Reads+Writes)",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -2225,11 +2283,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 21,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -2249,7 +2310,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@ -2263,7 +2324,7 @@
"title": "ThroughPut(Read+Write)",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -2313,11 +2374,14 @@
"datasource": "$datasource",
"fill": 1,
"id": 22,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -2465,7 +2529,7 @@
],
"targets": [
{
"expr": "sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2474,7 +2538,7 @@
"step": 10
},
{
"expr": "sum by(namespace) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"expr": "sum by(namespace) (rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2483,7 +2547,7 @@
"step": 10
},
{
"expr": "sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2492,7 +2556,7 @@
"step": 10
},
{
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2501,7 +2565,7 @@
"step": 10
},
{
"expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2510,7 +2574,7 @@
"step": 10
},
{
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))",
"expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2525,7 +2589,7 @@
"title": "Current Storage IO",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"transform": "table",
@ -2578,7 +2642,7 @@
"value": "default"
},
"hide": 0,
"label": null,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",

View File

@ -22,11 +22,14 @@
"fill": 1,
"format": "percentunit",
"id": 1,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -46,7 +49,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@ -59,7 +62,7 @@
"title": "CPU Utilisation (from requests)",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@ -98,11 +101,14 @@
"fill": 1,
"format": "percentunit",
"id": 2,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -122,7 +128,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@ -135,7 +141,7 @@
"title": "CPU Utilisation (from limits)",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@ -174,11 +180,14 @@
"fill": 1,
"format": "percentunit",
"id": 3,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -198,7 +207,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@ -211,7 +220,7 @@
"title": "Memory Utilisation (from requests)",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@ -250,11 +259,14 @@
"fill": 1,
"format": "percentunit",
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -274,7 +286,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@ -287,7 +299,7 @@
"title": "Memory Utilisation (from limits)",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@ -337,11 +349,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 5,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -414,7 +429,7 @@
"title": "CPU Usage",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -464,11 +479,14 @@
"datasource": "$datasource",
"fill": 1,
"id": 6,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -648,7 +666,7 @@
"title": "CPU Quota",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"transform": "table",
@ -699,11 +717,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 7,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -746,7 +767,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)",
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -776,7 +797,7 @@
"title": "Memory Usage (w/o cache)",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -826,11 +847,14 @@
"datasource": "$datasource",
"fill": 1,
"id": 8,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1004,7 +1028,7 @@
],
"targets": [
{
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)",
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1022,7 +1046,7 @@
"step": 10
},
{
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1040,7 +1064,7 @@
"step": 10
},
{
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1049,7 +1073,7 @@
"step": 10
},
{
"expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1058,7 +1082,7 @@
"step": 10
},
{
"expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"expr": "sum(container_memory_cache{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1067,7 +1091,7 @@
"step": 10
},
{
"expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"expr": "sum(container_memory_swap{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1082,7 +1106,7 @@
"title": "Memory Quota",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"transform": "table",
@ -1135,10 +1159,12 @@
"id": 9,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1282,7 +1308,7 @@
],
"targets": [
{
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1291,7 +1317,7 @@
"step": 10
},
{
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1300,7 +1326,7 @@
"step": 10
},
{
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1309,7 +1335,7 @@
"step": 10
},
{
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1318,7 +1344,7 @@
"step": 10
},
{
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1327,7 +1353,7 @@
"step": 10
},
{
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1342,7 +1368,7 @@
"title": "Current Network Usage",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"transform": "table",
@ -1393,11 +1419,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 10,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1417,7 +1446,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1431,7 +1460,7 @@
"title": "Receive Bandwidth",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1469,11 +1498,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 11,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1493,7 +1525,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1507,7 +1539,7 @@
"title": "Transmit Bandwidth",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1557,11 +1589,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 12,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1581,7 +1616,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1595,7 +1630,7 @@
"title": "Rate of Received Packets",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1633,11 +1668,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 13,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1657,7 +1695,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1671,7 +1709,7 @@
"title": "Rate of Transmitted Packets",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1721,11 +1759,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 14,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1745,7 +1786,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1759,7 +1800,7 @@
"title": "Rate of Received Packets Dropped",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1797,11 +1838,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 15,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1821,7 +1865,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1835,7 +1879,7 @@
"title": "Rate of Transmitted Packets Dropped",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1886,11 +1930,14 @@
"decimals": -1,
"fill": 10,
"id": 16,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1910,7 +1957,7 @@
"steppedLine": false,
"targets": [
{
"expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m])))",
"expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1924,7 +1971,7 @@
"title": "IOPS(Reads+Writes)",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1962,11 +2009,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 17,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1986,7 +2036,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))",
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -2000,7 +2050,7 @@
"title": "ThroughPut(Read+Write)",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -2050,11 +2100,14 @@
"datasource": "$datasource",
"fill": 1,
"id": 18,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -2202,7 +2255,7 @@
],
"targets": [
{
"expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))",
"expr": "sum by(pod) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2211,7 +2264,7 @@
"step": 10
},
{
"expr": "sum by(pod) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))",
"expr": "sum by(pod) (rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2220,7 +2273,7 @@
"step": 10
},
{
"expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))",
"expr": "sum by(pod) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2229,7 +2282,7 @@
"step": 10
},
{
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))",
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2238,7 +2291,7 @@
"step": 10
},
{
"expr": "sum by(pod) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))",
"expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2247,7 +2300,7 @@
"step": 10
},
{
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))",
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -2262,7 +2315,7 @@
"title": "Current Storage IO",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"transform": "table",
@ -2315,7 +2368,7 @@
"value": "default"
},
"hide": 0,
"label": null,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
@ -2336,7 +2389,7 @@
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(kube_pod_info, cluster)",
"query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@ -2359,7 +2412,7 @@
"multi": false,
"name": "namespace",
"options": [ ],
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
"query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -21,11 +21,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 1,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -38,12 +41,32 @@
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"seriesOverrides": [
{
"alias": "max capacity",
"color": "#F2495C",
"dashes": true,
"fill": 0,
"hiddenSeries": true,
"hideTooltip": true,
"legend": true,
"linewidth": 2,
"stack": false
}
],
"spaceLength": 10,
"span": 12,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "max capacity",
"legendLink": null,
"step": 10
},
{
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "time_series",
@ -59,7 +82,7 @@
"title": "CPU Usage",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -109,11 +132,14 @@
"datasource": "$datasource",
"fill": 1,
"id": 2,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -293,7 +319,7 @@
"title": "CPU Quota",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"transform": "table",
@ -344,11 +370,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 3,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -361,12 +390,32 @@
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"seriesOverrides": [
{
"alias": "max capacity",
"color": "#F2495C",
"dashes": true,
"fill": 0,
"hiddenSeries": true,
"hideTooltip": true,
"legend": true,
"linewidth": 2,
"stack": false
}
],
"spaceLength": 10,
"span": 12,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "max capacity",
"legendLink": null,
"step": 10
},
{
"expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)",
"format": "time_series",
@ -382,7 +431,7 @@
"title": "Memory Usage (w/o cache)",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -432,11 +481,14 @@
"datasource": "$datasource",
"fill": 1,
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -688,7 +740,7 @@
"title": "Memory Quota",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"transform": "table",
@ -741,7 +793,7 @@
"value": "default"
},
"hide": 0,
"label": null,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
@ -762,7 +814,7 @@
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(kube_pod_info, cluster)",
"query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@ -785,7 +837,7 @@
"multi": true,
"name": "node",
"options": [ ],
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, node)",
"query": "label_values(kube_node_info{cluster=\"$cluster\"}, node)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -21,11 +21,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 1,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -72,7 +75,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n",
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "requests",
@ -80,7 +83,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n",
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "limits",
@ -94,7 +97,7 @@
"title": "CPU Usage",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -144,11 +147,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 2,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -168,7 +174,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(increase(container_cpu_cfs_throttled_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[5m])) by (container) /sum(increase(container_cpu_cfs_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[5m])) by (container)",
"expr": "sum(increase(container_cpu_cfs_throttled_periods_total{job=\"cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) /sum(increase(container_cpu_cfs_periods_total{job=\"cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{container}}",
@ -191,7 +197,7 @@
"title": "CPU Throttling",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -241,11 +247,14 @@
"datasource": "$datasource",
"fill": 1,
"id": 3,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -425,7 +434,7 @@
"title": "CPU Quota",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"transform": "table",
@ -476,11 +485,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -521,7 +533,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)",
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{container}}",
@ -529,7 +541,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n",
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "requests",
@ -537,7 +549,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n",
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "limits",
@ -551,7 +563,7 @@
"title": "Memory Usage (WSS)",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -601,11 +613,14 @@
"datasource": "$datasource",
"fill": 1,
"id": 5,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -779,7 +794,7 @@
],
"targets": [
{
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)",
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -797,7 +812,7 @@
"step": 10
},
{
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -815,7 +830,7 @@
"step": 10
},
{
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -824,7 +839,7 @@
"step": 10
},
{
"expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -833,7 +848,7 @@
"step": 10
},
{
"expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"expr": "sum(container_memory_cache{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -842,7 +857,7 @@
"step": 10
},
{
"expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"expr": "sum(container_memory_swap{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -857,7 +872,7 @@
"title": "Memory Quota",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"transform": "table",
@ -910,10 +925,12 @@
"id": 6,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -933,7 +950,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -947,7 +964,7 @@
"title": "Receive Bandwidth",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -987,10 +1004,12 @@
"id": 7,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1010,7 +1029,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1024,7 +1043,7 @@
"title": "Transmit Bandwidth",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1076,10 +1095,12 @@
"id": 8,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1099,7 +1120,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1113,7 +1134,7 @@
"title": "Rate of Received Packets",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1153,10 +1174,12 @@
"id": 9,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1176,7 +1199,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1190,7 +1213,7 @@
"title": "Rate of Transmitted Packets",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1242,10 +1265,12 @@
"id": 10,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1265,7 +1290,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1279,7 +1304,7 @@
"title": "Rate of Received Packets Dropped",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1319,10 +1344,12 @@
"id": 11,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1342,7 +1369,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1356,7 +1383,7 @@
"title": "Rate of Transmitted Packets Dropped",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1407,11 +1434,14 @@
"decimals": -1,
"fill": 10,
"id": 12,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1431,7 +1461,7 @@
"steppedLine": false,
"targets": [
{
"expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[5m])))",
"expr": "ceil(sum by(pod) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Reads",
@ -1439,7 +1469,7 @@
"step": 10
},
{
"expr": "ceil(sum by(pod) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[5m])))",
"expr": "ceil(sum by(pod) (rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Writes",
@ -1453,7 +1483,7 @@
"title": "IOPS",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1491,11 +1521,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 13,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1515,7 +1548,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[5m]))",
"expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Reads",
@ -1523,7 +1556,7 @@
"step": 10
},
{
"expr": "sum by(pod) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[5m]))",
"expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Writes",
@ -1537,7 +1570,7 @@
"title": "ThroughPut",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1588,11 +1621,14 @@
"decimals": -1,
"fill": 10,
"id": 14,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1612,7 +1648,7 @@
"steppedLine": false,
"targets": [
{
"expr": "ceil(sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m])))",
"expr": "ceil(sum by(container) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{container}}",
@ -1626,7 +1662,7 @@
"title": "IOPS(Reads+Writes)",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1664,11 +1700,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 15,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1688,7 +1727,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))",
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{container}}",
@ -1702,7 +1741,7 @@
"title": "ThroughPut(Read+Write)",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1752,11 +1791,14 @@
"datasource": "$datasource",
"fill": 1,
"id": 16,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1904,7 +1946,7 @@
],
"targets": [
{
"expr": "sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))",
"expr": "sum by(container) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1913,7 +1955,7 @@
"step": 10
},
{
"expr": "sum by(container) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))",
"expr": "sum by(container) (rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1922,7 +1964,7 @@
"step": 10
},
{
"expr": "sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))",
"expr": "sum by(container) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1931,7 +1973,7 @@
"step": 10
},
{
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))",
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1940,7 +1982,7 @@
"step": 10
},
{
"expr": "sum by(container) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))",
"expr": "sum by(container) (rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1949,7 +1991,7 @@
"step": 10
},
{
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))",
"expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1964,7 +2006,7 @@
"title": "Current Storage IO",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"transform": "table",
@ -2017,7 +2059,7 @@
"value": "default"
},
"hide": 0,
"label": null,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
@ -2038,7 +2080,7 @@
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(kube_pod_info, cluster)",
"query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@ -2061,7 +2103,7 @@
"multi": false,
"name": "namespace",
"options": [ ],
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
"query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
"refresh": 2,
"regex": "",
"sort": 1,
@ -2084,7 +2126,7 @@
"multi": false,
"name": "pod",
"options": [ ],
"query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=\"$namespace\"}, pod)",
"query": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, pod)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -21,11 +21,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 1,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -59,7 +62,7 @@
"title": "CPU Usage",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -109,11 +112,14 @@
"datasource": "$datasource",
"fill": 1,
"id": 2,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -251,7 +257,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -260,7 +266,7 @@
"step": 10
},
{
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -269,7 +275,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -278,7 +284,7 @@
"step": 10
},
{
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -293,7 +299,7 @@
"title": "CPU Quota",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"transform": "table",
@ -344,11 +350,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 3,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -382,7 +391,7 @@
"title": "Memory Usage",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -432,11 +441,14 @@
"datasource": "$datasource",
"fill": 1,
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -574,7 +586,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -583,7 +595,7 @@
"step": 10
},
{
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -592,7 +604,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -601,7 +613,7 @@
"step": 10
},
{
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -616,7 +628,7 @@
"title": "Memory Quota",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"transform": "table",
@ -669,10 +681,12 @@
"id": 5,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -816,7 +830,7 @@
],
"targets": [
{
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -825,7 +839,7 @@
"step": 10
},
{
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -834,7 +848,7 @@
"step": 10
},
{
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -843,7 +857,7 @@
"step": 10
},
{
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -852,7 +866,7 @@
"step": 10
},
{
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -861,7 +875,7 @@
"step": 10
},
{
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -876,7 +890,7 @@
"title": "Current Network Usage",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"transform": "table",
@ -927,11 +941,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 6,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -951,7 +968,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -965,7 +982,7 @@
"title": "Receive Bandwidth",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1003,11 +1020,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 7,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1027,7 +1047,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1041,7 +1061,7 @@
"title": "Transmit Bandwidth",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1091,11 +1111,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 8,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1115,7 +1138,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(avg(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1129,7 +1152,7 @@
"title": "Average Container Bandwidth by Pod: Received",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1167,11 +1190,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 9,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1191,7 +1217,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(avg(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1205,7 +1231,7 @@
"title": "Average Container Bandwidth by Pod: Transmitted",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1255,11 +1281,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 10,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1279,7 +1308,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1293,7 +1322,7 @@
"title": "Rate of Received Packets",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1331,11 +1360,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 11,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1355,7 +1387,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1369,7 +1401,7 @@
"title": "Rate of Transmitted Packets",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1419,11 +1451,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 12,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1443,7 +1478,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1457,7 +1492,7 @@
"title": "Rate of Received Packets Dropped",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1495,11 +1530,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 13,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1519,7 +1557,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@ -1533,7 +1571,7 @@
"title": "Rate of Transmitted Packets Dropped",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1585,7 +1623,7 @@
"value": "default"
},
"hide": 0,
"label": null,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
@ -1606,7 +1644,7 @@
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(kube_pod_info, cluster)",
"query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@ -1629,30 +1667,7 @@
"multi": false,
"name": "namespace",
"options": [ ],
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"text": "",
"value": ""
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "workload",
"options": [ ],
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)",
"query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
"refresh": 2,
"regex": "",
"sort": 1,
@ -1675,7 +1690,30 @@
"multi": false,
"name": "type",
"options": [ ],
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)",
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\"}, workload_type)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"text": "",
"value": ""
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "workload",
"options": [ ],
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}, workload)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -21,11 +21,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 1,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -98,7 +101,7 @@
"title": "CPU Usage",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -148,11 +151,14 @@
"datasource": "$datasource",
"fill": 1,
"id": 2,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -329,7 +335,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -338,7 +344,7 @@
"step": 10
},
{
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -347,7 +353,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -356,7 +362,7 @@
"step": 10
},
{
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -371,7 +377,7 @@
"title": "CPU Quota",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"transform": "table",
@ -422,11 +428,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 3,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -469,7 +478,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"expr": "sum(\n container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}} - {{workload_type}}",
@ -499,7 +508,7 @@
"title": "Memory Usage",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -549,11 +558,14 @@
"datasource": "$datasource",
"fill": 1,
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -721,7 +733,7 @@
"step": 10
},
{
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"expr": "sum(\n container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -730,7 +742,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -739,7 +751,7 @@
"step": 10
},
{
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"expr": "sum(\n container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -748,7 +760,7 @@
"step": 10
},
{
"expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -757,7 +769,7 @@
"step": 10
},
{
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"expr": "sum(\n container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -772,7 +784,7 @@
"title": "Memory Quota",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"transform": "table",
@ -825,10 +837,12 @@
"id": 5,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -987,7 +1001,7 @@
],
"targets": [
{
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -996,7 +1010,7 @@
"step": 10
},
{
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1005,7 +1019,7 @@
"step": 10
},
{
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1014,7 +1028,7 @@
"step": 10
},
{
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1023,7 +1037,7 @@
"step": 10
},
{
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1032,7 +1046,7 @@
"step": 10
},
{
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -1047,7 +1061,7 @@
"title": "Current Network Usage",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"transform": "table",
@ -1098,11 +1112,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 6,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1122,7 +1139,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@ -1136,7 +1153,7 @@
"title": "Receive Bandwidth",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1174,11 +1191,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 7,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1198,7 +1218,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@ -1212,7 +1232,7 @@
"title": "Transmit Bandwidth",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1262,11 +1282,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 8,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1286,7 +1309,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(avg(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@ -1300,7 +1323,7 @@
"title": "Average Container Bandwidth by Workload: Received",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1338,11 +1361,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 9,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1362,7 +1388,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(avg(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@ -1376,7 +1402,7 @@
"title": "Average Container Bandwidth by Workload: Transmitted",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1426,11 +1452,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 10,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1450,7 +1479,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@ -1464,7 +1493,7 @@
"title": "Rate of Received Packets",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1502,11 +1531,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 11,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1526,7 +1558,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@ -1540,7 +1572,7 @@
"title": "Rate of Transmitted Packets",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1590,11 +1622,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 12,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1614,7 +1649,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@ -1628,7 +1663,7 @@
"title": "Rate of Received Packets Dropped",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1666,11 +1701,14 @@
"datasource": "$datasource",
"fill": 10,
"id": 13,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
@ -1690,7 +1728,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@ -1704,7 +1742,7 @@
"title": "Rate of Transmitted Packets Dropped",
"tooltip": {
"shared": false,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
@ -1756,7 +1794,7 @@
"value": "default"
},
"hide": 0,
"label": null,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
@ -1777,7 +1815,7 @@
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(kube_pod_info, cluster)",
"query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@ -1787,34 +1825,6 @@
"type": "query",
"useTags": false
},
{
"allValue": null,
"auto": false,
"auto_count": 30,
"auto_min": "10s",
"current": {
"text": "deployment",
"value": "deployment"
},
"datasource": "$datasource",
"definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\"}, workload_type)",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "type",
"options": [ ],
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\"}, workload_type)",
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
@ -1828,7 +1838,7 @@
"multi": false,
"name": "namespace",
"options": [ ],
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
"query": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
"refresh": 2,
"regex": "",
"sort": 1,
@ -1837,6 +1847,34 @@
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"auto": false,
"auto_count": 30,
"auto_min": "10s",
"current": {
"text": "deployment",
"value": "deployment"
},
"datasource": "$datasource",
"definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\"}, workload_type)",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "type",
"options": [ ],
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\"}, workload_type)",
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},

View File

@ -154,7 +154,7 @@
"refId": "A"
}
],
"title": "Running Container",
"title": "Running Containers",
"transparent": false,
"type": "stat"
},
@ -294,7 +294,7 @@
"pluginVersion": "7",
"targets": [
{
"expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m]))",
"expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -347,7 +347,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (operation_type, instance)",
"expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (operation_type, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_type}}",
@ -432,7 +432,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type)",
"expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_type}}",
@ -517,7 +517,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
"expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_type}}",
@ -602,14 +602,14 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)",
"expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} pod",
"refId": "A"
},
{
"expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)",
"expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} worker",
@ -694,14 +694,14 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))",
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} pod",
"refId": "A"
},
{
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))",
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} worker",
@ -788,7 +788,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
"expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
@ -875,7 +875,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
"expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
@ -962,7 +962,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))",
"expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
@ -1047,7 +1047,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type)",
"expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{operation_type}}",
@ -1132,7 +1132,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
"expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{operation_type}}",
@ -1218,7 +1218,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance)",
"expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -1303,7 +1303,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))",
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -1388,7 +1388,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))",
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -1473,28 +1473,28 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "2xx",
"refId": "A"
},
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "3xx",
"refId": "B"
},
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "4xx",
"refId": "C"
},
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "5xx",
@ -1579,7 +1579,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))",
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{verb}} {{url}}",
@ -1749,7 +1749,7 @@
"steppedLine": false,
"targets": [
{
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])",
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -1893,7 +1893,7 @@
"value": "default"
},
"hide": 0,
"label": null,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
@ -1927,11 +1927,11 @@
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
"label": null,
"label": "Data Source",
"multi": false,
"name": "instance",
"options": [ ],
"query": "label_values(kubelet_runtime_operations_total{cluster=\"$cluster\", job=\"kubelet\"}, instance)",
"query": "label_values(up{job=\"kubelet\",cluster=\"$cluster\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -1137,7 +1137,7 @@
"value": "default"
},
"hide": 0,
"label": null,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",

View File

@ -89,7 +89,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ workload }}",
@ -184,7 +184,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ workload }}",
@ -441,7 +441,7 @@
],
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -450,7 +450,7 @@
"step": 10
},
{
"expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -459,7 +459,7 @@
"step": 10
},
{
"expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -468,7 +468,7 @@
"step": 10
},
{
"expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -477,7 +477,7 @@
"step": 10
},
{
"expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -486,7 +486,7 @@
"step": 10
},
{
"expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -495,7 +495,7 @@
"step": 10
},
{
"expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -504,7 +504,7 @@
"step": 10
},
{
"expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@ -578,7 +578,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ workload }}",
@ -673,7 +673,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ workload }}",
@ -794,7 +794,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{workload}}",
@ -885,7 +885,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{workload}}",
@ -987,7 +987,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{workload}}",
@ -1078,7 +1078,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{workload}}",
@ -1189,7 +1189,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{workload}}",
@ -1280,7 +1280,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{workload}}",
@ -1349,7 +1349,7 @@
"value": "default"
},
"hide": 0,
"label": null,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
@ -1415,14 +1415,14 @@
"value": "deployment"
},
"datasource": "$datasource",
"definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\"}, workload_type)",
"definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\"}, workload_type)",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "type",
"options": [ ],
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\"}, workload_type)",
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\"}, workload_type)",
"refresh": 2,
"regex": "",
"skipUrlSync": false,

View File

@ -26,13 +26,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 2,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -124,7 +125,11 @@
},
"gridPos": { },
"id": 3,
"interval": null,
"interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"links": [ ],
"mappingType": 1,
"mappingTypes": [
@ -207,13 +212,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -305,7 +311,11 @@
},
"gridPos": { },
"id": 5,
"interval": null,
"interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"links": [ ],
"mappingType": 1,
"mappingTypes": [
@ -388,7 +398,7 @@
"value": "default"
},
"hide": 0,
"label": null,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
@ -406,7 +416,7 @@
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(kubelet_volume_stats_capacity_bytes, cluster)",
"query": "label_values(kubelet_volume_stats_capacity_bytes{job=\"kubelet\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -903,7 +903,7 @@
"value": "default"
},
"hide": 0,
"label": null,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",

View File

@ -36,7 +36,11 @@
},
"gridPos": { },
"id": 2,
"interval": null,
"interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"links": [ ],
"mappingType": 1,
"mappingTypes": [
@ -106,13 +110,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 3,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -134,7 +139,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))",
"expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "rate",
@ -187,6 +192,7 @@
"fillGradient": 0,
"gridPos": { },
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -215,7 +221,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))",
"expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -281,13 +287,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 5,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -309,7 +316,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))",
"expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "rate",
@ -362,6 +369,7 @@
"fillGradient": 0,
"gridPos": { },
"id": 6,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -390,7 +398,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m])) by (instance, le))",
"expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -456,13 +464,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 7,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -484,28 +493,28 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "2xx",
"refId": "A"
},
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "3xx",
"refId": "B"
},
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "4xx",
"refId": "C"
},
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "5xx",
@ -558,13 +567,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 8,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -586,7 +596,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[5m])) by (verb, url, le))",
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}",
@ -652,6 +662,7 @@
"fillGradient": 0,
"gridPos": { },
"id": 9,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -680,7 +691,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}",
@ -746,13 +757,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 10,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -827,13 +839,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 11,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -855,7 +868,7 @@
"steppedLine": false,
"targets": [
{
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}[5m])",
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -908,13 +921,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 12,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -1002,7 +1016,7 @@
"value": "default"
},
"hide": 0,
"label": null,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
@ -1020,7 +1034,7 @@
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(kube_pod_info, cluster)",
"query": "label_values(up{job=\"kube-proxy\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@ -1040,7 +1054,7 @@
"multi": false,
"name": "instance",
"options": [ ],
"query": "label_values(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\"}, instance)",
"query": "label_values(up{job=\"kube-proxy\", cluster=\"$cluster\", job=\"kube-proxy\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -36,7 +36,11 @@
},
"gridPos": { },
"id": 2,
"interval": null,
"interval": "1m",
"legend": {
"alignAsTable": true,
"rightSide": true
},
"links": [ ],
"mappingType": 1,
"mappingTypes": [
@ -106,6 +110,7 @@
"fillGradient": 0,
"gridPos": { },
"id": 3,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -134,28 +139,28 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} e2e",
"refId": "A"
},
{
"expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} binding",
"refId": "B"
},
{
"expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} scheduling algorithm",
"refId": "C"
},
{
"expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} volume",
@ -208,6 +213,7 @@
"fillGradient": 0,
"gridPos": { },
"id": 4,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -236,28 +242,28 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} e2e",
"refId": "A"
},
{
"expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} binding",
"refId": "B"
},
{
"expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} scheduling algorithm",
"refId": "C"
},
{
"expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}} {{instance}} volume",
@ -323,13 +329,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 5,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -351,28 +358,28 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "2xx",
"refId": "A"
},
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "3xx",
"refId": "B"
},
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "4xx",
"refId": "C"
},
{
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
"expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "5xx",
@ -425,13 +432,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 6,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -453,7 +461,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))",
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}",
@ -519,6 +527,7 @@
"fillGradient": 0,
"gridPos": { },
"id": 7,
"interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@ -547,7 +556,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}",
@ -613,13 +622,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 8,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -694,13 +704,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 9,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -722,7 +733,7 @@
"steppedLine": false,
"targets": [
{
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])",
"expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@ -775,13 +786,14 @@
"fillGradient": 0,
"gridPos": { },
"id": 10,
"interval": "1m",
"legend": {
"alignAsTable": false,
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@ -869,7 +881,7 @@
"value": "default"
},
"hide": 0,
"label": null,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
@ -907,7 +919,7 @@
"multi": false,
"name": "instance",
"options": [ ],
"query": "label_values(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\"}, instance)",
"query": "label_values(up{job=\"kube-scheduler\", cluster=\"$cluster\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -89,7 +89,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "sort_desc(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ pod }}",
@ -184,7 +184,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ pod }}",
@ -290,7 +290,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "sort_desc(avg(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ pod }}",
@ -385,7 +385,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ pod }}",
@ -506,7 +506,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "sort_desc(sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@ -597,7 +597,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@ -699,7 +699,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "sort_desc(sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@ -790,7 +790,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "sort_desc(sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@ -901,7 +901,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@ -992,7 +992,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@ -1061,7 +1061,7 @@
"value": "default"
},
"hide": 0,
"label": null,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
@ -1079,7 +1079,7 @@
"multi": false,
"name": "cluster",
"options": [ ],
"query": "label_values(kube_pod_info, cluster)",
"query": "label_values(kube_pod_info{job=\"kube-state-metrics\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,
@ -1099,14 +1099,14 @@
"value": "kube-system"
},
"datasource": "$datasource",
"definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)",
"definition": "label_values(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\"}, namespace)",
"hide": 0,
"includeAll": true,
"label": null,
"multi": false,
"name": "namespace",
"options": [ ],
"query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)",
"query": "label_values(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\"}, namespace)",
"refresh": 2,
"regex": "",
"skipUrlSync": false,

View File

@ -8,8 +8,8 @@
"subdir": "grafana"
}
},
"version": "c3b14b24b83cfe9abf1064649d19e2d679f033fb",
"sum": "YrE4DNQsWgYWs6h0j/FjQETt8xDXdYdsslb1WK7xQEk="
"version": "199e363523104ff8b3a12483a4e3eca86372b078",
"sum": "/jDHzVAjHB4AOLkJHw1GyATX5ogZ1iMdcJXZAgaG3+g="
},
{
"source": {
@ -18,8 +18,8 @@
"subdir": "contrib/mixin"
}
},
"version": "3df272774672366beb02c5447782805ab5fec957",
"sum": "5XhYOigrKipOWDbIn9hlrz7JcbelzvJnormxSaup9JI="
"version": "29292aa7bdafaf65cb5e054591fe0ff07b36f5ee",
"sum": "cdKL5kPYfpWSpTCu4qctmh+gWQqL+4YWom6rw9qLYJU="
},
{
"source": {
@ -28,7 +28,7 @@
"subdir": "grafonnet"
}
},
"version": "19b27b272abf4263af1365ec485784c49815a332",
"version": "3626fc4dc2326931c530861ac5bebe39444f6cbf",
"sum": "gF8foHByYcB25jcUOBqP6jxk0OPifQMjPvKY0HaCk6w="
},
{
@ -38,8 +38,8 @@
"subdir": "grafana-builder"
}
},
"version": "b7eae75972a369bf8ebfb03dcb0d4c14464ef85a",
"sum": "GRf2GvwEU4jhXV+JOonXSZ4wdDv8mnHBPCQ6TUVd+g8="
"version": "b102f9ac7d1290ac025c2a7ac99f7fd9a9948503",
"sum": "0KkygBQd/AFzUvVzezE4qF/uDYgrwUXVpZfINBti0oc="
},
{
"source": {
@ -48,8 +48,8 @@
"subdir": ""
}
},
"version": "ff4641bcd83314c955150bea6b147df9ca335c4a",
"sum": "oUVGwcCbmdH8qz9B+lbRawI9s23GY9HeW7MwYZRbZ/0="
"version": "9821d07e94e9a9916575a234fb699ae3331fa939",
"sum": "xubNXyvDwUw9GZzi9BRb6ob3bYzfoMr5F5zCVn2d7ag="
},
{
"source": {
@ -58,7 +58,7 @@
"subdir": "lib/promgrafonnet"
}
},
"version": "ff4641bcd83314c955150bea6b147df9ca335c4a",
"version": "9821d07e94e9a9916575a234fb699ae3331fa939",
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
},
{
@ -68,8 +68,8 @@
"subdir": "jsonnet/kube-state-metrics"
}
},
"version": "8dab6f7472c26987ab7f8899a4a2f753fed8e8a8",
"sum": "S5qI+PJUdNeYOv76jH5nxwYS9N6U7CRxvyuB1wI4cTE="
"version": "e3056ae518d0234105276ec916296923968ad294",
"sum": "U1wzIpTAtOvC1yj43Y8PfvT0JfvnAcMfNH12Wi+ab0Y="
},
{
"source": {
@ -78,7 +78,7 @@
"subdir": "jsonnet/kube-state-metrics-mixin"
}
},
"version": "8dab6f7472c26987ab7f8899a4a2f753fed8e8a8",
"version": "e3056ae518d0234105276ec916296923968ad294",
"sum": "u8gaydJoxEjzizQ8jY8xSjYgWooPmxw+wIWdDxifMAk="
},
{
@ -88,8 +88,8 @@
"subdir": "jsonnet/kube-prometheus"
}
},
"version": "a2eee1803a074fb40cad109d690732c22f0130cf",
"sum": "kqVnoNBux2YF1s03m+O3w/5jreAnjXx2/NjvNP1Hoy4="
"version": "9ca30579f61ec51e63d87927d19b9d2a433c7e25",
"sum": "EYlmVYtdVovF3ziMZ9dhV0trzXww6YSz8A2tH2YF9Zw="
},
{
"source": {
@ -98,8 +98,8 @@
"subdir": "jsonnet/mixin"
}
},
"version": "42fc15967e35e0cca68cf935f844086edbc82d0e",
"sum": "6reUygVmQrLEWQzTKcH8ceDbvM+2ztK3z2VBR2K2l+U=",
"version": "335ebbc2f6ecf10b699821fa8cebcbff4a718ca7",
"sum": "qZ4WgiweaE6eeKtFK60QUjLO8sf2L9Q8fgafWvDcyfY=",
"name": "prometheus-operator-mixin"
},
{
@ -109,8 +109,8 @@
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "42fc15967e35e0cca68cf935f844086edbc82d0e",
"sum": "sECNXs/aIEreFUma1BWVyknBygqh3AVJEB3msmrAYYY="
"version": "335ebbc2f6ecf10b699821fa8cebcbff4a718ca7",
"sum": "Vr2IY6Uz1lYYyGDF7QaEAVkJwAtOEikCfuXJN2eAUM0="
},
{
"source": {
@ -119,7 +119,7 @@
"subdir": "doc/alertmanager-mixin"
}
},
"version": "e35efbddb66a73fd8723be5334477e76f21fbd19",
"version": "e2a10119aaf7777fa523d216e05897c5b719134c",
"sum": "pep+dHzfIjh2SU5pEkwilMCAT/NoL6YYflV4x8cr7vU=",
"name": "alertmanager"
},
@ -130,8 +130,8 @@
"subdir": "docs/node-mixin"
}
},
"version": "0e6b23c338e98809c9872c70a2f5dfa8d6d370d4",
"sum": "MnfAA4+l2BkgJncnYfV8uHC7CxHZut8+ap8KkEqyB5Y="
"version": "7dbf35891570f9ce3bccb25a55176ea4923b35dd",
"sum": "MlWDAKGZ+JArozRKdKEvewHeWn8j2DNBzesJfLVd0dk="
},
{
"source": {
@ -140,8 +140,8 @@
"subdir": "documentation/prometheus-mixin"
}
},
"version": "a05b510fc32c3ecc2fc369002576179ae1cbcc23",
"sum": "m4VHwft4fUcxzL4+52lLZG/V5aH5ZEdjaweb88vISL0=",
"version": "c965a7555b7ffcee1a127d782abd5bb478a16750",
"sum": "ZjQoYhvgKwJNkg+h+m9lW3SYjnjv5Yx5btEipLhru88=",
"name": "prometheus"
},
{
@ -151,8 +151,8 @@
"subdir": "mixin"
}
},
"version": "360b39e1c6ab3ac8dcefa225a6205142f9362c68",
"sum": "Og+wEHfgzXBvBLAeeQvGNoiCw3FY4LQHlJdpsG/owj8=",
"version": "d1acaea2a11a3e4db6bb435c98dea63c517e3530",
"sum": "1Y1cPIeoPg2nCAEhKPCt8bAGuwuOP2eZ3kVF432mlMA=",
"name": "thanos-mixin"
}
],

View File

@ -4,6 +4,7 @@
"metadata": {
"labels": {
"app.kubernetes.io/component": "alert-router",
"app.kubernetes.io/instance": "main",
"app.kubernetes.io/name": "alertmanager",
"app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "0.23.0",

View File

@ -64,10 +64,10 @@
{
"alert": "etcdGRPCRequestsSlow",
"annotations": {
"description": "etcd cluster \"{{ $labels.job }}\": 99th percentile of gRPC requests is {{ $value }}s on etcd instance {{ $labels.instance }}.",
"description": "etcd cluster \"{{ $labels.job }}\": 99th percentile of gRPC requests is {{ $value }}s on etcd instance {{ $labels.instance }} for {{ $labels.grpc_method }} method.",
"summary": "etcd grpc requests are slow"
},
"expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~\".*etcd.*\", grpc_type=\"unary\"}[5m])) without(grpc_type))\n> 0.15\n",
"expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~\".*etcd.*\", grpc_method!=\"Defragment\", grpc_type=\"unary\"}[5m])) without(grpc_type))\n> 0.15\n",
"for": "10m",
"labels": {
"severity": "critical"

View File

@ -6,7 +6,7 @@
"app.kubernetes.io/component": "exporter",
"app.kubernetes.io/name": "kube-state-metrics",
"app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "2.2.1",
"app.kubernetes.io/version": "2.2.4",
"prometheus": "k8s",
"role": "alert-rules"
},

View File

@ -19,11 +19,11 @@
{
"alert": "KubePodCrashLooping",
"annotations": {
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting {{ printf \"%.2f\" $value }} times / 10 minutes.",
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is in waiting state (reason: \"CrashLoopBackOff\").",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping",
"summary": "Pod is crash looping."
},
"expr": "increase(kube_pod_container_status_restarts_total{job=\"kube-state-metrics\"}[10m]) > 0\nand\nkube_pod_container_status_waiting{job=\"kube-state-metrics\"} == 1\n",
"expr": "max_over_time(kube_pod_container_status_waiting_reason{reason=\"CrashLoopBackOff\", job=\"kube-state-metrics\"}[5m]) >= 1\n",
"for": "15m",
"labels": {
"severity": "warning"
@ -123,7 +123,7 @@
{
"alert": "KubeContainerWaiting",
"annotations": {
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} has been in waiting state for longer than 1 hour.",
"description": "pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container {{ $labels.container}} has been in waiting state for longer than 1 hour.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting",
"summary": "Pod container waiting longer than 1 hour"
},
@ -232,7 +232,7 @@
{
"alert": "KubeMemoryOvercommit",
"annotations": {
"description": "Cluster has overcommitted memory resource requests for Pods by {{ $value }} bytes and cannot tolerate node failure.",
"description": "Cluster has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit",
"summary": "Cluster has overcommitted memory resource requests."
},
@ -249,7 +249,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit",
"summary": "Cluster has overcommitted CPU resource requests."
},
"expr": "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"cpu\"})\n /\nsum(kube_node_status_allocatable{resource=\"cpu\"})\n > 1.5\n",
"expr": "sum(min without(resource) (kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=~\"(cpu|requests.cpu)\"}))\n /\nsum(kube_node_status_allocatable{resource=\"cpu\", job=\"kube-state-metrics\"})\n > 1.5\n",
"for": "5m",
"labels": {
"severity": "warning"
@ -262,7 +262,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit",
"summary": "Cluster has overcommitted memory resource requests."
},
"expr": "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"memory\"})\n /\nsum(kube_node_status_allocatable{resource=\"memory\",job=\"kube-state-metrics\"})\n > 1.5\n",
"expr": "sum(min without(resource) (kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=~\"(memory|requests.memory)\"}))\n /\nsum(kube_node_status_allocatable{resource=\"memory\", job=\"kube-state-metrics\"})\n > 1.5\n",
"for": "5m",
"labels": {
"severity": "warning"
@ -332,7 +332,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup",
"summary": "PersistentVolume is filling up."
},
"expr": "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\n",
"expr": "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n",
"for": "1m",
"labels": {
"severity": "critical"
@ -345,7 +345,7 @@
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup",
"summary": "PersistentVolume is filling up."
},
"expr": "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\n",
"expr": "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n",
"for": "1h",
"labels": {
"severity": "warning"
@ -468,7 +468,7 @@
{
"alert": "KubeClientCertificateExpiration",
"annotations": {
"description": "A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.",
"description": "A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration",
"summary": "Client certificate is about to expire."
},
@ -480,7 +480,7 @@
{
"alert": "KubeClientCertificateExpiration",
"annotations": {
"description": "A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.",
"description": "A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration",
"summary": "Client certificate is about to expire."
},
@ -490,11 +490,11 @@
}
},
{
"alert": "AggregatedAPIErrors",
"alert": "KubeAggregatedAPIErrors",
"annotations": {
"description": "An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/aggregatedapierrors",
"summary": "An aggregated API has reported errors."
"description": "Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors",
"summary": "Kubernetes aggregated API has reported errors."
},
"expr": "sum by(name, namespace)(increase(aggregator_unavailable_apiservice_total[10m])) > 4\n",
"labels": {
@ -502,11 +502,11 @@
}
},
{
"alert": "AggregatedAPIDown",
"alert": "KubeAggregatedAPIDown",
"annotations": {
"description": "An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/aggregatedapidown",
"summary": "An aggregated API is down."
"description": "Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown",
"summary": "Kubernetes aggregated API is down."
},
"expr": "(1 - max by(name, namespace)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85\n",
"for": "5m",
@ -530,9 +530,9 @@
{
"alert": "KubeAPITerminatedRequests",
"annotations": {
"description": "The apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.",
"description": "The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests",
"summary": "The apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests."
"summary": "The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests."
},
"expr": "sum(rate(apiserver_request_terminations_total{job=\"apiserver\"}[10m])) / ( sum(rate(apiserver_request_total{job=\"apiserver\"}[10m])) + sum(rate(apiserver_request_terminations_total{job=\"apiserver\"}[10m])) ) > 0.20\n",
"for": "5m",
@ -748,53 +748,71 @@
}
]
},
{
"name": "kubernetes-system-kube-proxy",
"rules": [
{
"alert": "KubeProxyDown",
"annotations": {
"description": "KubeProxy has disappeared from Prometheus target discovery.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeproxydown",
"summary": "Target disappeared from Prometheus target discovery."
},
"expr": "absent(up{job=\"kube-proxy\"} == 1)\n",
"for": "15m",
"labels": {
"severity": "critical"
}
}
]
},
{
"name": "kube-apiserver-burnrate.rules",
"rules": [
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate1d"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate1h"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate2h"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate30m"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate3d"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n",
"labels": {
"verb": "read"
},
"record": "apiserver_request:burnrate5m"
},
{
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n",
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n",
"labels": {
"verb": "read"
},
@ -932,14 +950,14 @@
"record": "cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d"
},
{
"expr": "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n",
"expr": "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n",
"labels": {
"verb": "all"
},
"record": "apiserver_request:availability30d"
},
{
"expr": "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n",
"expr": "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n",
"labels": {
"verb": "read"
},
@ -1008,7 +1026,7 @@
"record": "node_namespace_pod_container:container_memory_swap"
},
{
"expr": "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
"expr": "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
"record": "cluster:namespace:pod_memory:active:kube_pod_container_resource_requests"
},
{
@ -1016,7 +1034,7 @@
"record": "namespace_memory:kube_pod_container_resource_requests:sum"
},
{
"expr": "kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
"expr": "kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
"record": "cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests"
},
{
@ -1024,7 +1042,7 @@
"record": "namespace_cpu:kube_pod_container_resource_requests:sum"
},
{
"expr": "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
"expr": "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n",
"record": "cluster:namespace:pod_memory:active:kube_pod_container_resource_limits"
},
{
@ -1032,7 +1050,7 @@
"record": "namespace_memory:kube_pod_container_resource_limits:sum"
},
{
"expr": "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n )\n",
"expr": "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n )\n",
"record": "cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits"
},
{

View File

@ -6,7 +6,7 @@
"app.kubernetes.io/component": "exporter",
"app.kubernetes.io/name": "node-exporter",
"app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "1.2.2",
"app.kubernetes.io/version": "1.3.1",
"prometheus": "k8s",
"role": "alert-rules"
},
@ -259,7 +259,7 @@
"record": "instance:node_num_cpu:sum"
},
{
"expr": "1 - avg without (cpu, mode) (\n rate(node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\"}[5m])\n)\n",
"expr": "1 - avg without (cpu) (\n sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\"}[5m]))\n)\n",
"record": "instance:node_cpu_utilisation:rate5m"
},
{
@ -267,7 +267,7 @@
"record": "instance:node_load1_per_cpu:ratio"
},
{
"expr": "1 - (\n node_memory_MemAvailable_bytes{job=\"node-exporter\"}\n or\n (\n node_memory_Buffers_bytes{job=\"node-exporter\"}\n +\n node_memory_Cached_bytes{job=\"node-exporter\"}\n +\n node_memory_MemFree_bytes{job=\"node-exporter\"}\n +\n node_memory_Slab_bytes{job=\"node-exporter\"}\n )\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\"}\n)\n",
"expr": "1 - (\n (\n node_memory_MemAvailable_bytes{job=\"node-exporter\"}\n or\n (\n node_memory_Buffers_bytes{job=\"node-exporter\"}\n +\n node_memory_Cached_bytes{job=\"node-exporter\"}\n +\n node_memory_MemFree_bytes{job=\"node-exporter\"}\n +\n node_memory_Slab_bytes{job=\"node-exporter\"}\n )\n )\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\"}\n)\n",
"record": "instance:node_memory_utilisation:ratio"
},
{

View File

@ -6,7 +6,7 @@
"app.kubernetes.io/component": "controller",
"app.kubernetes.io/name": "prometheus-operator",
"app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "0.51.1",
"app.kubernetes.io/version": "0.52.1",
"prometheus": "k8s",
"role": "alert-rules"
},
@ -110,6 +110,24 @@
}
}
]
},
{
"name": "config-reloaders",
"rules": [
{
"alert": "ConfigReloaderSidecarErrors",
"annotations": {
"description": "Errors encountered while the {{$labels.pod}} config-reloader sidecar attempts to sync config in {{$labels.namespace}} namespace.\nAs a result, configuration for service running in {{$labels.pod}} may be stale and cannot be updated anymore.",
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/configreloadersidecarerrors",
"summary": "config-reloader sidecar has not had a successful reload for 10m"
},
"expr": "max_over_time(reloader_last_reload_successful{namespace=~\".+\"}[5m]) == 0\n",
"for": "10m",
"labels": {
"severity": "warning"
}
}
]
}
]
}

View File

@ -4,9 +4,10 @@
"metadata": {
"labels": {
"app.kubernetes.io/component": "prometheus",
"app.kubernetes.io/instance": "k8s",
"app.kubernetes.io/name": "prometheus",
"app.kubernetes.io/part-of": "kube-prometheus",
"app.kubernetes.io/version": "2.30.0",
"app.kubernetes.io/version": "2.31.1",
"prometheus": "k8s",
"role": "alert-rules"
},

File diff suppressed because one or more lines are too long

View File

@ -49,9 +49,9 @@ spec:
severity: warning
- alert: etcdGRPCRequestsSlow
annotations:
description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile of gRPC requests is {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile of gRPC requests is {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}} for {{`{{`}} $labels.grpc_method {{`}}`}} method.'
summary: etcd grpc requests are slow
expr: 'histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_type="unary"}[5m])) without(grpc_type))
expr: 'histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_method!="Defragment", grpc_type="unary"}[5m])) without(grpc_type))
> 0.15

View File

@ -11,14 +11,10 @@ spec:
rules:
- alert: KubePodCrashLooping
annotations:
description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is restarting {{`{{`}} printf "%.2f" $value {{`}}`}} times / 10 minutes.
description: 'Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is in waiting state (reason: "CrashLoopBackOff").'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping
summary: Pod is crash looping.
expr: 'increase(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[10m]) > 0
and
kube_pod_container_status_waiting{job="kube-state-metrics"} == 1
expr: 'max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", job="kube-state-metrics"}[5m]) >= 1
'
for: 15m
@ -89,7 +85,7 @@ spec:
severity: warning
- alert: KubeContainerWaiting
annotations:
description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour.
description: pod/{{`{{`}} $labels.pod {{`}}`}} in namespace {{`{{`}} $labels.namespace {{`}}`}} on container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting
summary: Pod container waiting longer than 1 hour
expr: 'sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
@ -177,7 +173,7 @@ spec:
severity: warning
- alert: KubeMemoryOvercommit
annotations:
description: Cluster has overcommitted memory resource requests for Pods by {{`{{`}} $value {{`}}`}} bytes and cannot tolerate node failure.
description: Cluster has overcommitted memory resource requests for Pods by {{`{{`}} $value | humanize {{`}}`}} bytes and cannot tolerate node failure.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit
summary: Cluster has overcommitted memory resource requests.
expr: 'sum(namespace_memory:kube_pod_container_resource_requests:sum{}) - (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0
@ -195,7 +191,7 @@ spec:
description: Cluster has overcommitted CPU resource requests for Namespaces.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit
summary: Cluster has overcommitted CPU resource requests.
expr: "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"cpu\"})\n /\nsum(kube_node_status_allocatable{resource=\"cpu\"})\n > 1.5\n"
expr: "sum(min without(resource) (kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=~\"(cpu|requests.cpu)\"}))\n /\nsum(kube_node_status_allocatable{resource=\"cpu\", job=\"kube-state-metrics\"})\n > 1.5\n"
for: 5m
labels:
severity: warning
@ -204,7 +200,7 @@ spec:
description: Cluster has overcommitted memory resource requests for Namespaces.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit
summary: Cluster has overcommitted memory resource requests.
expr: "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"memory\"})\n /\nsum(kube_node_status_allocatable{resource=\"memory\",job=\"kube-state-metrics\"})\n > 1.5\n"
expr: "sum(min without(resource) (kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=~\"(memory|requests.memory)\"}))\n /\nsum(kube_node_status_allocatable{resource=\"memory\", job=\"kube-state-metrics\"})\n > 1.5\n"
for: 5m
labels:
severity: warning
@ -251,7 +247,7 @@ spec:
description: The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is only {{`{{`}} $value | humanizePercentage {{`}}`}} free.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
summary: PersistentVolume is filling up.
expr: "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\n"
expr: "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.03\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n"
for: 1m
labels:
severity: critical
@ -260,7 +256,7 @@ spec:
description: Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is expected to fill up within four days. Currently {{`{{`}} $value | humanizePercentage {{`}}`}} is available.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
summary: PersistentVolume is filling up.
expr: "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\n"
expr: "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}\n) < 0.15\nand\nkubelet_volume_stats_used_bytes{job=\"kubelet\", metrics_path=\"/metrics\"} > 0\nand\npredict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) < 0\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1\nunless on(namespace, persistentvolumeclaim)\nkube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1\n"
for: 1h
labels:
severity: warning
@ -371,7 +367,7 @@ spec:
rules:
- alert: KubeClientCertificateExpiration
annotations:
description: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration
summary: Client certificate is about to expire.
expr: 'apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
@ -381,7 +377,7 @@ spec:
severity: warning
- alert: KubeClientCertificateExpiration
annotations:
description: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration
summary: Client certificate is about to expire.
expr: 'apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
@ -389,21 +385,21 @@ spec:
'
labels:
severity: critical
- alert: AggregatedAPIErrors
- alert: KubeAggregatedAPIErrors
annotations:
description: An aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has reported errors. It has appeared unavailable {{`{{`}} $value | humanize {{`}}`}} times averaged over the past 10m.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/aggregatedapierrors
summary: An aggregated API has reported errors.
description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has reported errors. It has appeared unavailable {{`{{`}} $value | humanize {{`}}`}} times averaged over the past 10m.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors
summary: Kubernetes aggregated API has reported errors.
expr: 'sum by(name, namespace)(increase(aggregator_unavailable_apiservice_total[10m])) > 4
'
labels:
severity: warning
- alert: AggregatedAPIDown
- alert: KubeAggregatedAPIDown
annotations:
description: An aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/aggregatedapidown
summary: An aggregated API is down.
description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown
summary: Kubernetes aggregated API is down.
expr: '(1 - max by(name, namespace)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85
'
@ -423,9 +419,9 @@ spec:
severity: critical
- alert: KubeAPITerminatedRequests
annotations:
description: The apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
description: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests
summary: The apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
summary: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
expr: 'sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / ( sum(rate(apiserver_request_total{job="apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20
'
@ -597,33 +593,46 @@ spec:
for: 15m
labels:
severity: critical
- name: kubernetes-system-kube-proxy
rules:
- alert: KubeProxyDown
annotations:
description: KubeProxy has disappeared from Prometheus target discovery.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeproxydown
summary: Target disappeared from Prometheus target discovery.
expr: 'absent(up{job="kube-proxy"} == 1)
'
for: 15m
labels:
severity: critical
- name: kube-apiserver-burnrate.rules
rules:
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n"
labels:
verb: read
record: apiserver_request:burnrate1d
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n"
labels:
verb: read
record: apiserver_request:burnrate1h
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n"
labels:
verb: read
record: apiserver_request:burnrate2h
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n"
labels:
verb: read
record: apiserver_request:burnrate30m
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n"
labels:
verb: read
record: apiserver_request:burnrate3d
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n"
labels:
verb: read
record: apiserver_request:burnrate5m
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n"
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n"
labels:
verb: read
record: apiserver_request:burnrate6h
@ -724,12 +733,12 @@ spec:
'
record: cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d
- expr: "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"\
- expr: "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"\
})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n"
labels:
verb: all
record: apiserver_request:availability30d
- expr: "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"40\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n"
- expr: "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n"
labels:
verb: read
record: apiserver_request:availability30d
@ -777,19 +786,19 @@ spec:
record: node_namespace_pod_container:container_memory_cache
- expr: "container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,\n max by(namespace, pod, node) (kube_pod_info{node!=\"\"})\n)\n"
record: node_namespace_pod_container:container_memory_swap
- expr: "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
- expr: "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests
- expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n"
record: namespace_memory:kube_pod_container_resource_requests:sum
- expr: "kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
- expr: "kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests
- expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n"
record: namespace_cpu:kube_pod_container_resource_requests:sum
- expr: "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
- expr: "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n)\n"
record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits
- expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n"
record: namespace_memory:kube_pod_container_resource_limits:sum
- expr: "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n )\n"
- expr: "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\ngroup_left() max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)\n )\n"
record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits
- expr: "sum by (namespace, cluster) (\n sum by (namespace, pod, cluster) (\n max by (namespace, pod, container, cluster) (\n kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"}\n ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n"
record: namespace_cpu:kube_pod_container_resource_limits:sum

View File

@ -190,11 +190,11 @@ spec:
rules:
- expr: "count without (cpu, mode) (\n node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}\n)\n"
record: instance:node_num_cpu:sum
- expr: "1 - avg without (cpu, mode) (\n rate(node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\"}[5m])\n)\n"
- expr: "1 - avg without (cpu) (\n sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\"}[5m]))\n)\n"
record: instance:node_cpu_utilisation:rate5m
- expr: "(\n node_load1{job=\"node-exporter\"}\n/\n instance:node_num_cpu:sum{job=\"node-exporter\"}\n)\n"
record: instance:node_load1_per_cpu:ratio
- expr: "1 - (\n node_memory_MemAvailable_bytes{job=\"node-exporter\"}\n or\n (\n node_memory_Buffers_bytes{job=\"node-exporter\"}\n +\n node_memory_Cached_bytes{job=\"node-exporter\"}\n +\n node_memory_MemFree_bytes{job=\"node-exporter\"}\n +\n node_memory_Slab_bytes{job=\"node-exporter\"}\n )\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\"}\n)\n"
- expr: "1 - (\n (\n node_memory_MemAvailable_bytes{job=\"node-exporter\"}\n or\n (\n node_memory_Buffers_bytes{job=\"node-exporter\"}\n +\n node_memory_Cached_bytes{job=\"node-exporter\"}\n +\n node_memory_MemFree_bytes{job=\"node-exporter\"}\n +\n node_memory_Slab_bytes{job=\"node-exporter\"}\n )\n )\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\"}\n)\n"
record: instance:node_memory_utilisation:ratio
- expr: 'rate(node_vmstat_pgmajfault{job="node-exporter"}[5m])

View File

@ -86,4 +86,19 @@ spec:
for: 5m
labels:
severity: warning
- name: config-reloaders
rules:
- alert: ConfigReloaderSidecarErrors
annotations:
description: 'Errors encountered while the {{`{{`}}$labels.pod{{`}}`}} config-reloader sidecar attempts to sync config in {{`{{`}}$labels.namespace{{`}}`}} namespace.
As a result, configuration for service running in {{`{{`}}$labels.pod{{`}}`}} may be stale and cannot be updated anymore.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/configreloadersidecarerrors
summary: config-reloader sidecar has not had a successful reload for 10m
expr: 'max_over_time(reloader_last_reload_successful{namespace=~".+"}[5m]) == 0
'
for: 10m
labels:
severity: warning

View File

@ -9,6 +9,7 @@ helm pull prometheus-community/kube-prometheus-stack --untar --untardir charts -
rm -rf charts/prometheus-pushgateway
helm pull prometheus-community/prometheus-pushgateway --untar --untardir charts --version $PG_VER
# workaround for https://github.com/prometheus-community/helm-charts/issues/1500
patch -p0 -i zdt.patch --no-backup-if-mismatch
patch -p0 -i zdt-pushgateway.patch --no-backup-if-mismatch

Some files were not shown because too many files have changed in this diff Show More