Compare commits
17 Commits
renovate/k
...
main
Author | SHA1 | Date | |
---|---|---|---|
7802bdda76 | |||
05b7d5e2a5 | |||
59e0ac1685 | |||
8152310f6e | |||
c6cccac5c4 | |||
67c2faa62b | |||
4d469423de | |||
d838c51f3e | |||
eae32d70a5 | |||
190e21ea6f | |||
c53b56a14a | |||
a4f48cd3db | |||
a160ee1d75 | |||
db37706330 | |||
3343d51d09 | |||
99996b606f | |||
9f25776e10 |
@ -5,8 +5,8 @@ FROM docker.io/alpine:${ALPINE_VERSION}
|
||||
ARG ALPINE_VERSION
|
||||
ARG KUBE_VERSION=1.31
|
||||
|
||||
ARG SOPS_VERSION="3.9.1"
|
||||
ARG VALS_VERSION="0.37.6"
|
||||
ARG SOPS_VERSION="3.9.4"
|
||||
ARG VALS_VERSION="0.39.1"
|
||||
ARG HELM_SECRETS_VERSION="4.6.2"
|
||||
|
||||
RUN cd /etc/apk/keys && \
|
||||
|
@ -49,7 +49,6 @@ function cert-manager-post() {
|
||||
wait_for "kubectl get deployment -n $namespace cert-manager-webhook"
|
||||
kubectl rollout status deployment -n $namespace cert-manager-webhook
|
||||
wait_for 'kubectl get validatingwebhookconfigurations -o yaml | grep "caBundle: LS0"'
|
||||
apply
|
||||
fi
|
||||
|
||||
wait_for "kubectl get ClusterIssuer -n $namespace kubezero-local-ca-issuer"
|
||||
@ -82,11 +81,11 @@ function metrics-pre() {
|
||||
get_kubezero_values $ARGOCD
|
||||
|
||||
# Always use embedded kubezero chart
|
||||
helm template $CHARTS/kubezero -f $WORKDIR/kubezero-values.yaml --kube-version $KUBE_VERSION --version ~$KUBE_VERSION --devel --output-dir $WORKDIR
|
||||
helm template $CHARTS/kubezero -f $WORKDIR/kubezero-values.yaml --kube-version $KUBE_VERSION --name-template kubezero --version ~$KUBE_VERSION --devel --output-dir $WORKDIR
|
||||
|
||||
# Root KubeZero apply directly and exit
|
||||
if [ ${ARTIFACTS[0]} == "kubezero" ]; then
|
||||
kubectl apply -f $WORKDIR/kubezero/templates
|
||||
kubectl replace -f $WORKDIR/kubezero/templates
|
||||
exit $?
|
||||
|
||||
# "catch all" apply all enabled modules
|
||||
|
@ -121,18 +121,16 @@ control_plane_upgrade() {
|
||||
get_kubezero_values $ARGOCD
|
||||
|
||||
# tumble new config through migrate.py
|
||||
migrate_argo_values.py < "$WORKDIR"/kubezero-values.yaml > "$WORKDIR"/new-kubezero-values.yaml
|
||||
migrate_argo_values.py < "$WORKDIR"/kubezero-values.yaml > "$WORKDIR"/new-kubezero-values.yaml \
|
||||
&& mv "$WORKDIR"/new-kubezero-values.yaml "$WORKDIR"/kubezero-values.yaml
|
||||
|
||||
# Update kubezero-values CM
|
||||
kubectl get cm -n kubezero kubezero-values -o=yaml | \
|
||||
yq e '.data."values.yaml" |= load_str("/tmp/kubezero/new-kubezero-values.yaml")' | \
|
||||
kubectl apply --server-side --force-conflicts -f -
|
||||
update_kubezero_cm
|
||||
|
||||
if [ "$ARGOCD" == "True" ]; then
|
||||
# update argo app
|
||||
export kubezero_chart_version=$(yq .version $CHARTS/kubezero/Chart.yaml)
|
||||
kubectl get application kubezero -n argocd -o yaml | \
|
||||
yq '.spec.source.helm.valuesObject |= load("/tmp/kubezero/new-kubezero-values.yaml") | .spec.source.targetRevision = strenv(kubezero_chart_version)' \
|
||||
yq ".spec.source.helm.valuesObject |= load(\"$WORKDIR/kubezero-values.yaml\") | .spec.source.targetRevision = strenv(kubezero_chart_version)" \
|
||||
> $WORKDIR/new-argocd-app.yaml
|
||||
kubectl apply --server-side --force-conflicts -f $WORKDIR/new-argocd-app.yaml
|
||||
|
||||
|
@ -46,13 +46,17 @@ function get_kubezero_values() {
|
||||
}
|
||||
|
||||
|
||||
# Update kubezero-values CM
|
||||
# Overwrite kubezero-values CM with file
|
||||
function update_kubezero_cm() {
|
||||
kubectl get application kubezero -n argocd -o yaml | yq .spec.source.helm.valuesObject > ${WORKDIR}/kubezero-values.yaml
|
||||
|
||||
kubectl get cm -n kubezero kubezero-values -o=yaml | \
|
||||
yq e '.data."values.yaml" |= load_str("/tmp/kubezero/kubezero-values.yaml")' | \
|
||||
kubectl apply --server-side --force-conflicts -f -
|
||||
yq e ".data.\"values.yaml\" |= load_str(\"$WORKDIR/kubezero-values.yaml\")" | \
|
||||
kubectl replace -f -
|
||||
}
|
||||
|
||||
# sync kubezero-values CM from ArgoCD app
|
||||
function sync_kubezero_cm_from_argo() {
|
||||
get_kubezero_values True
|
||||
update_kubezero_cm
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
apiVersion: v2
|
||||
description: KubeZero Argo - Events, Workflow, CD
|
||||
name: kubezero-argo
|
||||
version: 0.2.7
|
||||
version: 0.2.8
|
||||
home: https://kubezero.com
|
||||
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
|
||||
keywords:
|
||||
@ -18,11 +18,11 @@ dependencies:
|
||||
version: ">= 0.1.6"
|
||||
repository: https://cdn.zero-downtime.net/charts/
|
||||
- name: argo-events
|
||||
version: 2.4.9
|
||||
version: 2.4.13
|
||||
repository: https://argoproj.github.io/argo-helm
|
||||
condition: argo-events.enabled
|
||||
- name: argo-cd
|
||||
version: 7.7.7
|
||||
version: 7.8.2
|
||||
repository: https://argoproj.github.io/argo-helm
|
||||
condition: argo-cd.enabled
|
||||
- name: argocd-apps
|
||||
@ -30,7 +30,7 @@ dependencies:
|
||||
repository: https://argoproj.github.io/argo-helm
|
||||
condition: argo-cd.enabled
|
||||
- name: argocd-image-updater
|
||||
version: 0.11.2
|
||||
version: 0.12.0
|
||||
repository: https://argoproj.github.io/argo-helm
|
||||
condition: argocd-image-updater.enabled
|
||||
kubeVersion: ">= 1.26.0-0"
|
||||
|
@ -1,6 +1,6 @@
|
||||
# kubezero-argo
|
||||
|
||||
![Version: 0.2.7](https://img.shields.io/badge/Version-0.2.7-informational?style=flat-square)
|
||||
![Version: 0.2.8](https://img.shields.io/badge/Version-0.2.8-informational?style=flat-square)
|
||||
|
||||
KubeZero Argo - Events, Workflow, CD
|
||||
|
||||
@ -18,16 +18,17 @@ Kubernetes: `>= 1.26.0-0`
|
||||
|
||||
| Repository | Name | Version |
|
||||
|------------|------|---------|
|
||||
| https://argoproj.github.io/argo-helm | argo-cd | 7.7.7 |
|
||||
| https://argoproj.github.io/argo-helm | argo-events | 2.4.9 |
|
||||
| https://argoproj.github.io/argo-helm | argo-cd | 7.8.2 |
|
||||
| https://argoproj.github.io/argo-helm | argo-events | 2.4.13 |
|
||||
| https://argoproj.github.io/argo-helm | argocd-apps | 2.0.2 |
|
||||
| https://argoproj.github.io/argo-helm | argocd-image-updater | 0.11.2 |
|
||||
| https://argoproj.github.io/argo-helm | argocd-image-updater | 0.12.0 |
|
||||
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.6 |
|
||||
|
||||
## Values
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| argo-cd.configs.cm."application.instanceLabelKey" | string | `nil` | |
|
||||
| argo-cd.configs.cm."application.resourceTrackingMethod" | string | `"annotation"` | |
|
||||
| argo-cd.configs.cm."resource.customizations" | string | `"cert-manager.io/Certificate:\n # Lua script for customizing the health status assessment\n health.lua: |\n hs = {}\n if obj.status ~= nil then\n if obj.status.conditions ~= nil then\n for i, condition in ipairs(obj.status.conditions) do\n if condition.type == \"Ready\" and condition.status == \"False\" then\n hs.status = \"Degraded\"\n hs.message = condition.message\n return hs\n end\n if condition.type == \"Ready\" and condition.status == \"True\" then\n hs.status = \"Healthy\"\n hs.message = condition.message\n return hs\n end\n end\n end\n end\n hs.status = \"Progressing\"\n hs.message = \"Waiting for certificate\"\n return hs\n"` | |
|
||||
| argo-cd.configs.cm."timeout.reconciliation" | string | `"300s"` | |
|
||||
@ -35,6 +36,7 @@ Kubernetes: `>= 1.26.0-0`
|
||||
| argo-cd.configs.cm."ui.bannerpermanent" | string | `"true"` | |
|
||||
| argo-cd.configs.cm."ui.bannerposition" | string | `"bottom"` | |
|
||||
| argo-cd.configs.cm."ui.bannerurl" | string | `"https://kubezero.com/releases/v1.31"` | |
|
||||
| argo-cd.configs.cm.installationID | string | `"KubeZero-ArgoCD"` | |
|
||||
| argo-cd.configs.cm.url | string | `"https://argocd.example.com"` | |
|
||||
| argo-cd.configs.params."controller.diff.server.side" | string | `"true"` | |
|
||||
| argo-cd.configs.params."controller.operation.processors" | string | `"5"` | |
|
||||
@ -94,7 +96,7 @@ Kubernetes: `>= 1.26.0-0`
|
||||
| argo-events.configs.jetstream.streamConfig.maxMsgs | int | `1000000` | Maximum number of messages before expiring oldest message |
|
||||
| argo-events.configs.jetstream.streamConfig.replicas | int | `1` | Number of replicas, defaults to 3 and requires minimal 3 |
|
||||
| argo-events.configs.jetstream.versions[0].configReloaderImage | string | `"natsio/nats-server-config-reloader:0.14.1"` | |
|
||||
| argo-events.configs.jetstream.versions[0].metricsExporterImage | string | `"natsio/prometheus-nats-exporter:0.14.0"` | |
|
||||
| argo-events.configs.jetstream.versions[0].metricsExporterImage | string | `"natsio/prometheus-nats-exporter:0.16.0"` | |
|
||||
| argo-events.configs.jetstream.versions[0].natsImage | string | `"nats:2.10.11-scratch"` | |
|
||||
| argo-events.configs.jetstream.versions[0].startCommand | string | `"/nats-server"` | |
|
||||
| argo-events.configs.jetstream.versions[0].version | string | `"2.10.11"` | |
|
||||
|
@ -45,7 +45,7 @@ argo-cd:
|
||||
format: json
|
||||
image:
|
||||
repository: public.ecr.aws/zero-downtime/zdt-argocd
|
||||
tag: v2.13.1
|
||||
tag: v2.14.2
|
||||
networkPolicy:
|
||||
create: true
|
||||
|
||||
|
@ -2,7 +2,7 @@ apiVersion: v2
|
||||
name: kubezero-cert-manager
|
||||
description: KubeZero Umbrella Chart for cert-manager
|
||||
type: application
|
||||
version: 0.9.11
|
||||
version: 0.9.12
|
||||
home: https://kubezero.com
|
||||
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
|
||||
keywords:
|
||||
@ -16,6 +16,6 @@ dependencies:
|
||||
version: ">= 0.1.6"
|
||||
repository: https://cdn.zero-downtime.net/charts/
|
||||
- name: cert-manager
|
||||
version: v1.17.0
|
||||
version: v1.17.1
|
||||
repository: https://charts.jetstack.io
|
||||
kubeVersion: ">= 1.30.0-0"
|
||||
|
@ -1,6 +1,6 @@
|
||||
# kubezero-cert-manager
|
||||
|
||||
![Version: 0.9.11](https://img.shields.io/badge/Version-0.9.11-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
|
||||
![Version: 0.9.12](https://img.shields.io/badge/Version-0.9.12-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
|
||||
|
||||
KubeZero Umbrella Chart for cert-manager
|
||||
|
||||
@ -14,12 +14,12 @@ KubeZero Umbrella Chart for cert-manager
|
||||
|
||||
## Requirements
|
||||
|
||||
Kubernetes: `>= 1.26.0-0`
|
||||
Kubernetes: `>= 1.30.0-0`
|
||||
|
||||
| Repository | Name | Version |
|
||||
|------------|------|---------|
|
||||
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.6 |
|
||||
| https://charts.jetstack.io | cert-manager | v1.17.0 |
|
||||
| https://charts.jetstack.io | cert-manager | v1.17.1 |
|
||||
|
||||
## AWS - OIDC IAM roles
|
||||
|
||||
|
@ -18,7 +18,7 @@
|
||||
"subdir": "contrib/mixin"
|
||||
}
|
||||
},
|
||||
"version": "c0e7e8c873a6067f9ae9076c3c243a20fa713a58",
|
||||
"version": "8c52b414f324d6369b77096af98d8f0416fe20cb",
|
||||
"sum": "XmXkOCriQIZmXwlIIFhqlJMa0e6qGWdxZD+ZDYaN0Po="
|
||||
},
|
||||
{
|
||||
@ -78,7 +78,7 @@
|
||||
"subdir": "grafana-builder"
|
||||
}
|
||||
},
|
||||
"version": "d6c38bb26f576b128cadca4137d73a037afdd872",
|
||||
"version": "393630ca7ba9b25258c098f1fd4c81962e3ca046",
|
||||
"sum": "yxqWcq/N3E/a/XreeU6EuE6X7kYPnG0AspAQFKOjASo="
|
||||
},
|
||||
{
|
||||
@ -118,8 +118,8 @@
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "e27267571be06c2bdc3d2fd8dbd70161cd709cb4",
|
||||
"sum": "je1RPCp2aFNefYs5Q57Q5wDm93p8pL4pdBtA5rC7jLA="
|
||||
"version": "4ff562d5e8145940cf355f62cf2308895c4dca81",
|
||||
"sum": "kiL19fTbXOtNglsmT62kOzIf/Xpu+YwoiMPAApDXhkE="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -158,7 +158,7 @@
|
||||
"subdir": "jsonnet/mixin"
|
||||
}
|
||||
},
|
||||
"version": "4da36fdd2377362c285aee3a96f7b0516f6e41bf",
|
||||
"version": "7deab71d6d5921eeaf8c79e3ae8e31efe63783a9",
|
||||
"sum": "gi+knjdxs2T715iIQIntrimbHRgHnpM8IFBJDD1gYfs=",
|
||||
"name": "prometheus-operator-mixin"
|
||||
},
|
||||
@ -169,8 +169,8 @@
|
||||
"subdir": "jsonnet/prometheus-operator"
|
||||
}
|
||||
},
|
||||
"version": "4da36fdd2377362c285aee3a96f7b0516f6e41bf",
|
||||
"sum": "tb5PzIT75Hv4m3kbOHXvmrlcplg+EbS4++NfTttDNOk="
|
||||
"version": "7deab71d6d5921eeaf8c79e3ae8e31efe63783a9",
|
||||
"sum": "LctDdofQostvviE5y8vpRKWGGO1ZKO3dgJe7P9xifW0="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -200,7 +200,7 @@
|
||||
"subdir": "documentation/prometheus-mixin"
|
||||
}
|
||||
},
|
||||
"version": "906f6a33b60cec2596018ac8cc97ac41b16b06b7",
|
||||
"version": "a5ffa83be83be22e2ec9fd1d4765299d8d16119e",
|
||||
"sum": "2c+wttfee9TwuQJZIkNV7Tekem74Qgc7iZ842P28rNw=",
|
||||
"name": "prometheus"
|
||||
},
|
||||
|
@ -2,7 +2,7 @@ apiVersion: v2
|
||||
name: kubezero-istio
|
||||
description: KubeZero Umbrella Chart for Istio
|
||||
type: application
|
||||
version: 0.24.3
|
||||
version: 0.24.2
|
||||
home: https://kubezero.com
|
||||
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
|
||||
keywords:
|
||||
@ -16,13 +16,13 @@ dependencies:
|
||||
version: ">= 0.1.6"
|
||||
repository: https://cdn.zero-downtime.net/charts/
|
||||
- name: base
|
||||
version: 1.24.3
|
||||
version: 1.24.2
|
||||
repository: https://istio-release.storage.googleapis.com/charts
|
||||
- name: istiod
|
||||
version: 1.24.3
|
||||
version: 1.24.2
|
||||
repository: https://istio-release.storage.googleapis.com/charts
|
||||
- name: kiali-server
|
||||
version: "1.89.7"
|
||||
version: "2.5.0"
|
||||
repository: https://kiali.org/helm-charts
|
||||
condition: kiali-server.enabled
|
||||
kubeVersion: ">= 1.30.0-0"
|
||||
|
@ -23,7 +23,7 @@ Kubernetes: `>= 1.30.0-0`
|
||||
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.6 |
|
||||
| https://istio-release.storage.googleapis.com/charts | base | 1.24.2 |
|
||||
| https://istio-release.storage.googleapis.com/charts | istiod | 1.24.2 |
|
||||
| https://kiali.org/helm-charts | kiali-server | 1.89.7 |
|
||||
| https://kiali.org/helm-charts | kiali-server | 2.5.0 |
|
||||
|
||||
## Values
|
||||
|
||||
|
@ -5,22 +5,22 @@ folder: Istio
|
||||
condition: '.Values.istiod.telemetry.enabled'
|
||||
dashboards:
|
||||
- name: istio-control-plane
|
||||
url: https://grafana.com/api/dashboards/7645/revisions/229/download
|
||||
url: https://grafana.com/api/dashboards/7645/revisions/239/download
|
||||
tags:
|
||||
- Istio
|
||||
- name: istio-mesh
|
||||
url: https://grafana.com/api/dashboards/7639/revisions/229/download
|
||||
url: https://grafana.com/api/dashboards/7639/revisions/239/download
|
||||
tags:
|
||||
- Istio
|
||||
- name: istio-service
|
||||
url: https://grafana.com/api/dashboards/7636/revisions/229/download
|
||||
url: https://grafana.com/api/dashboards/7636/revisions/239/download
|
||||
tags:
|
||||
- Istio
|
||||
- name: istio-workload
|
||||
url: https://grafana.com/api/dashboards/7630/revisions/229/download
|
||||
url: https://grafana.com/api/dashboards/7630/revisions/239/download
|
||||
tags:
|
||||
- Istio
|
||||
- name: istio-performance
|
||||
url: https://grafana.com/api/dashboards/11829/revisions/229/download
|
||||
url: https://grafana.com/api/dashboards/11829/revisions/239/download
|
||||
tags:
|
||||
- Istio
|
||||
|
File diff suppressed because one or more lines are too long
@ -2,7 +2,7 @@ apiVersion: v2
|
||||
name: kubezero-metrics
|
||||
description: KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all Kubernetes integrations.
|
||||
type: application
|
||||
version: 0.10.3
|
||||
version: 0.11.0
|
||||
home: https://kubezero.com
|
||||
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
|
||||
keywords:
|
||||
@ -19,14 +19,14 @@ dependencies:
|
||||
version: ">= 0.1.6"
|
||||
repository: https://cdn.zero-downtime.net/charts/
|
||||
- name: kube-prometheus-stack
|
||||
version: 66.7.1
|
||||
version: 69.2.3
|
||||
repository: https://prometheus-community.github.io/helm-charts
|
||||
- name: prometheus-adapter
|
||||
version: 4.11.0
|
||||
repository: https://prometheus-community.github.io/helm-charts
|
||||
condition: prometheus-adapter.enabled
|
||||
- name: prometheus-pushgateway
|
||||
version: 2.17.0
|
||||
version: 3.0.0
|
||||
repository: https://prometheus-community.github.io/helm-charts
|
||||
condition: prometheus-pushgateway.enabled
|
||||
kubeVersion: ">= 1.26.0"
|
||||
kubeVersion: ">= 1.30.0-0"
|
||||
|
@ -1,6 +1,6 @@
|
||||
# kubezero-metrics
|
||||
|
||||
![Version: 0.10.2](https://img.shields.io/badge/Version-0.10.2-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
|
||||
![Version: 0.10.4](https://img.shields.io/badge/Version-0.10.4-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
|
||||
|
||||
KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all Kubernetes integrations.
|
||||
|
||||
@ -14,14 +14,14 @@ KubeZero Umbrella Chart for Prometheus, Grafana and Alertmanager as well as all
|
||||
|
||||
## Requirements
|
||||
|
||||
Kubernetes: `>= 1.26.0`
|
||||
Kubernetes: `>= 1.30.0-0`
|
||||
|
||||
| Repository | Name | Version |
|
||||
|------------|------|---------|
|
||||
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.6 |
|
||||
| https://prometheus-community.github.io/helm-charts | kube-prometheus-stack | 66.1.1 |
|
||||
| https://prometheus-community.github.io/helm-charts | kube-prometheus-stack | 69.2.3 |
|
||||
| https://prometheus-community.github.io/helm-charts | prometheus-adapter | 4.11.0 |
|
||||
| https://prometheus-community.github.io/helm-charts | prometheus-pushgateway | 2.15.0 |
|
||||
| https://prometheus-community.github.io/helm-charts | prometheus-pushgateway | 3.0.0 |
|
||||
|
||||
## Values
|
||||
|
||||
|
@ -30,7 +30,7 @@
|
||||
"options": {
|
||||
"content": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only."
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"title": "Notice",
|
||||
"type": "text"
|
||||
},
|
||||
@ -54,7 +54,7 @@
|
||||
},
|
||||
"id": 2,
|
||||
"interval": "1m",
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -100,7 +100,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -134,7 +134,7 @@
|
||||
},
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -232,7 +232,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -276,7 +276,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -319,7 +319,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -353,7 +353,7 @@
|
||||
},
|
||||
"id": 8,
|
||||
"interval": "1m",
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -451,7 +451,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -495,7 +495,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -538,7 +538,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -581,7 +581,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -624,7 +624,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -670,7 +670,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -712,7 +712,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -755,7 +755,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -797,7 +797,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -46,7 +46,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -92,7 +92,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -163,7 +163,7 @@
|
||||
"y": 9
|
||||
},
|
||||
"id": 3,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -328,7 +328,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -374,7 +374,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -420,7 +420,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -466,7 +466,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -512,7 +512,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -558,7 +558,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -604,7 +604,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -650,7 +650,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -696,7 +696,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -742,7 +742,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -35,7 +35,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -86,7 +86,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -137,7 +137,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -188,7 +188,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -239,7 +239,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -314,15 +314,15 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
|
||||
"legendFormat": "{{verb}} {{url}}"
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, le))",
|
||||
"legendFormat": "{{verb}}"
|
||||
}
|
||||
],
|
||||
"title": "Post Request Latency 99th Quantile",
|
||||
@ -365,15 +365,15 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
|
||||
"legendFormat": "{{verb}} {{url}}"
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, le))",
|
||||
"legendFormat": "{{verb}}"
|
||||
}
|
||||
],
|
||||
"title": "Get Request Latency 99th Quantile",
|
||||
@ -416,7 +416,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -467,7 +467,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -518,7 +518,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -35,7 +35,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -70,7 +70,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -105,7 +105,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -140,7 +140,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -175,7 +175,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -210,7 +210,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -260,7 +260,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -319,7 +319,7 @@
|
||||
"y": 12
|
||||
},
|
||||
"id": 8,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -476,7 +476,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -571,7 +571,7 @@
|
||||
"y": 24
|
||||
},
|
||||
"id": 10,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -748,7 +748,7 @@
|
||||
"y": 30
|
||||
},
|
||||
"id": 11,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -892,7 +892,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -943,7 +943,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -994,7 +994,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1045,7 +1045,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1096,7 +1096,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1147,7 +1147,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1198,7 +1198,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1249,7 +1249,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1300,7 +1300,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1351,7 +1351,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1422,7 +1422,7 @@
|
||||
"y": 96
|
||||
},
|
||||
"id": 22,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -35,7 +35,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -70,7 +70,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -105,7 +105,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -140,7 +140,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -242,7 +242,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -317,7 +317,7 @@
|
||||
"y": 14
|
||||
},
|
||||
"id": 6,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -500,7 +500,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -578,7 +578,7 @@
|
||||
"y": 28
|
||||
},
|
||||
"id": 8,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -768,7 +768,7 @@
|
||||
"y": 35
|
||||
},
|
||||
"id": 9,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -912,7 +912,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -963,7 +963,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1014,7 +1014,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1065,7 +1065,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1116,7 +1116,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1167,7 +1167,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1218,7 +1218,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1269,7 +1269,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1340,7 +1340,7 @@
|
||||
"y": 70
|
||||
},
|
||||
"id": 18,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -94,7 +94,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -161,7 +161,7 @@
|
||||
"y": 6
|
||||
},
|
||||
"id": 2,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -323,7 +323,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -342,6 +342,109 @@
|
||||
"legendFormat": "{{pod}}"
|
||||
}
|
||||
],
|
||||
"title": "Memory Usage (w/cache)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true,
|
||||
"stacking": {
|
||||
"mode": "normal"
|
||||
}
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "max capacity"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "red",
|
||||
"mode": "fixed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "custom.stacking",
|
||||
"value": {
|
||||
"mode": "none"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "custom.hideFrom",
|
||||
"value": {
|
||||
"legend": false,
|
||||
"tooltip": true,
|
||||
"viz": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "custom.lineStyle",
|
||||
"value": {
|
||||
"dash": [
|
||||
10,
|
||||
10
|
||||
],
|
||||
"fill": "dash"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 18
|
||||
},
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", job=\"kube-state-metrics\", node=~\"$node\", resource=\"memory\"})",
|
||||
"legendFormat": "max capacity"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)",
|
||||
"legendFormat": "{{pod}}"
|
||||
}
|
||||
],
|
||||
"title": "Memory Usage (w/o cache)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
@ -390,10 +493,10 @@
|
||||
"h": 6,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 18
|
||||
"y": 24
|
||||
},
|
||||
"id": 4,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"id": 5,
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -102,7 +102,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -208,7 +208,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -250,7 +250,7 @@
|
||||
"y": 14
|
||||
},
|
||||
"id": 3,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -433,7 +433,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -494,7 +494,7 @@
|
||||
"y": 28
|
||||
},
|
||||
"id": 5,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -664,7 +664,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -715,7 +715,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -766,7 +766,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -817,7 +817,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -868,7 +868,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -919,7 +919,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -970,7 +970,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1029,7 +1029,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1088,7 +1088,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1139,7 +1139,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1193,7 +1193,7 @@
|
||||
"y": 70
|
||||
},
|
||||
"id": 16,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -0,0 +1,675 @@
|
||||
{
|
||||
"editable": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 4,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "1 - avg(rate(windows_cpu_time_total{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", mode=\"idle\"}[$__rate_interval]))",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "CPU Utilisation",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 4,
|
||||
"x": 4,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) / sum(node:windows_node_num_cpu:sum{cluster=\"$cluster\"})",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "CPU Requests Commitment",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 4,
|
||||
"x": 8,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) / sum(node:windows_node_num_cpu:sum{cluster=\"$cluster\"})",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "CPU Limits Commitment",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 4,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "1 - sum(:windows_node_memory_MemFreeCached_bytes:sum{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "Memory Utilisation",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 4,
|
||||
"x": 16,
|
||||
"y": 0
|
||||
},
|
||||
"id": 5,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "Memory Requests Commitment",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 4,
|
||||
"x": 20,
|
||||
"y": 0
|
||||
},
|
||||
"id": 6,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "Memory Limits Commitment",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"id": 7,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)",
|
||||
"legendFormat": "__auto"
|
||||
}
|
||||
],
|
||||
"title": "CPU Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "/%/"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percentunit"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Namespace"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "links",
|
||||
"value": [
|
||||
{
|
||||
"title": "Drill down to pods",
|
||||
"url": "/d/490b402361724ab1d4c45666c1fa9b6f/k8s-resources-windows-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 14
|
||||
},
|
||||
"id": 8,
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "CPU Quota",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "joinByField",
|
||||
"options": {
|
||||
"byField": "namespace",
|
||||
"mode": "outer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"Time 1": true,
|
||||
"Time 2": true,
|
||||
"Time 3": true,
|
||||
"Time 4": true,
|
||||
"Time 5": true
|
||||
},
|
||||
"indexByName": {
|
||||
"Time 1": 0,
|
||||
"Time 2": 1,
|
||||
"Time 3": 2,
|
||||
"Time 4": 3,
|
||||
"Time 5": 4,
|
||||
"Value #A": 6,
|
||||
"Value #B": 7,
|
||||
"Value #C": 8,
|
||||
"Value #D": 9,
|
||||
"Value #E": 10,
|
||||
"namespace": 5
|
||||
},
|
||||
"renameByName": {
|
||||
"Value #A": "CPU Usage",
|
||||
"Value #B": "CPU Requests",
|
||||
"Value #C": "CPU Requests %",
|
||||
"Value #D": "CPU Limits",
|
||||
"Value #E": "CPU Limits %",
|
||||
"namespace": "Namespace"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "decbytes"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 21
|
||||
},
|
||||
"id": 9,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace)",
|
||||
"legendFormat": "__auto"
|
||||
}
|
||||
],
|
||||
"title": "Memory Usage (Private Working Set)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "/%/"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percentunit"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Memory Usage"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "decbytes"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Memory Requests"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "decbytes"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Memory Limits"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "decbytes"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Namespace"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "links",
|
||||
"value": [
|
||||
{
|
||||
"title": "Drill down to pods",
|
||||
"url": "/d/490b402361724ab1d4c45666c1fa9b6f/k8s-resources-windows-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 28
|
||||
},
|
||||
"id": 10,
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "Memory Requests by Namespace",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "joinByField",
|
||||
"options": {
|
||||
"byField": "namespace",
|
||||
"mode": "outer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"Time 1": true,
|
||||
"Time 2": true,
|
||||
"Time 3": true,
|
||||
"Time 4": true,
|
||||
"Time 5": true
|
||||
},
|
||||
"indexByName": {
|
||||
"Time 1": 0,
|
||||
"Time 2": 1,
|
||||
"Time 3": 2,
|
||||
"Time 4": 3,
|
||||
"Time 5": 4,
|
||||
"Value #A": 6,
|
||||
"Value #B": 7,
|
||||
"Value #C": 8,
|
||||
"Value #D": 9,
|
||||
"Value #E": 10,
|
||||
"namespace": 5
|
||||
},
|
||||
"renameByName": {
|
||||
"Value #A": "Memory Usage",
|
||||
"Value #B": "Memory Requests",
|
||||
"Value #C": "Memory Requests %",
|
||||
"Value #D": "Memory Limits",
|
||||
"Value #E": "Memory Limits %",
|
||||
"namespace": "Namespace"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 39,
|
||||
"tags": [
|
||||
"kubernetes"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Data source",
|
||||
"name": "datasource",
|
||||
"query": "prometheus",
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"hide": 2,
|
||||
"label": "cluster",
|
||||
"name": "cluster",
|
||||
"query": "label_values(up{job=\"kubernetes-windows-exporter\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Compute Resources / Cluster(Windows)",
|
||||
"uid": "4d08557fd9391b100730f2494bccac68"
|
||||
}
|
@ -0,0 +1,442 @@
|
||||
{
|
||||
"editable": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"legendFormat": "__auto"
|
||||
}
|
||||
],
|
||||
"title": "CPU Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "/%/"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percentunit"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Pod"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "links",
|
||||
"value": [
|
||||
{
|
||||
"title": "Drill down to pods",
|
||||
"url": "/d/40597a704a610e936dc6ed374a7ce023/k8s-resources-windows-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"id": 2,
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "CPU Quota",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "joinByField",
|
||||
"options": {
|
||||
"byField": "pod",
|
||||
"mode": "outer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"Time 1": true,
|
||||
"Time 2": true,
|
||||
"Time 3": true,
|
||||
"Time 4": true,
|
||||
"Time 5": true
|
||||
},
|
||||
"indexByName": {
|
||||
"Time 1": 0,
|
||||
"Time 2": 1,
|
||||
"Time 3": 2,
|
||||
"Time 4": 3,
|
||||
"Time 5": 4,
|
||||
"Value #A": 6,
|
||||
"Value #B": 7,
|
||||
"Value #C": 8,
|
||||
"Value #D": 9,
|
||||
"Value #E": 10,
|
||||
"pod": 5
|
||||
},
|
||||
"renameByName": {
|
||||
"Value #A": "CPU Usage",
|
||||
"Value #B": "CPU Requests",
|
||||
"Value #C": "CPU Requests %",
|
||||
"Value #D": "CPU Limits",
|
||||
"Value #E": "CPU Limits %",
|
||||
"pod": "Pod"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "decbytes"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 14
|
||||
},
|
||||
"id": 3,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"legendFormat": "__auto"
|
||||
}
|
||||
],
|
||||
"title": "Memory Usage (Private Working Set)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "/%/"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percentunit"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Pod"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "links",
|
||||
"value": [
|
||||
{
|
||||
"title": "Drill down to pods",
|
||||
"url": "/d/40597a704a610e936dc6ed374a7ce023/k8s-resources-windows-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 21
|
||||
},
|
||||
"id": 4,
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "Memory Quota",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "joinByField",
|
||||
"options": {
|
||||
"byField": "pod",
|
||||
"mode": "outer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"Time 1": true,
|
||||
"Time 2": true,
|
||||
"Time 3": true,
|
||||
"Time 4": true,
|
||||
"Time 5": true
|
||||
},
|
||||
"indexByName": {
|
||||
"Time 1": 0,
|
||||
"Time 2": 1,
|
||||
"Time 3": 2,
|
||||
"Time 4": 3,
|
||||
"Time 5": 4,
|
||||
"Value #A": 6,
|
||||
"Value #B": 7,
|
||||
"Value #C": 8,
|
||||
"Value #D": 9,
|
||||
"Value #E": 10,
|
||||
"pod": 5
|
||||
},
|
||||
"renameByName": {
|
||||
"Value #A": "Memory Usage",
|
||||
"Value #B": "Memory Requests",
|
||||
"Value #C": "Memory Requests %",
|
||||
"Value #D": "Memory Limits",
|
||||
"Value #E": "Memory Limits %",
|
||||
"pod": "Pod"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 39,
|
||||
"tags": [
|
||||
"kubernetes"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Data source",
|
||||
"name": "datasource",
|
||||
"query": "prometheus",
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"hide": 2,
|
||||
"label": "cluster",
|
||||
"name": "cluster",
|
||||
"query": "label_values(up{job=\"kubernetes-windows-exporter\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "namespace",
|
||||
"name": "namespace",
|
||||
"query": "label_values(windows_pod_container_available{cluster=\"$cluster\"}, namespace)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Compute Resources / Namespace(Windows)",
|
||||
"uid": "490b402361724ab1d4c45666c1fa9b6f"
|
||||
}
|
@ -0,0 +1,497 @@
|
||||
{
|
||||
"editable": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"legendFormat": "__auto"
|
||||
}
|
||||
],
|
||||
"title": "CPU Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "/%/"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percentunit"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Namespace"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "links",
|
||||
"value": [
|
||||
{
|
||||
"title": "Drill down to pods",
|
||||
"url": "/d/490b402361724ab1d4c45666c1fa9b6f/k8s-resources-windows-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"id": 2,
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "CPU Quota",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "joinByField",
|
||||
"options": {
|
||||
"byField": "container",
|
||||
"mode": "outer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"Time 1": true,
|
||||
"Time 2": true,
|
||||
"Time 3": true,
|
||||
"Time 4": true,
|
||||
"Time 5": true
|
||||
},
|
||||
"indexByName": {
|
||||
"Time 1": 0,
|
||||
"Time 2": 1,
|
||||
"Time 3": 2,
|
||||
"Time 4": 3,
|
||||
"Time 5": 4,
|
||||
"Value #A": 6,
|
||||
"Value #B": 7,
|
||||
"Value #C": 8,
|
||||
"Value #D": 9,
|
||||
"Value #E": 10,
|
||||
"container": 5
|
||||
},
|
||||
"renameByName": {
|
||||
"Value #A": "CPU Usage",
|
||||
"Value #B": "CPU Requests",
|
||||
"Value #C": "CPU Requests %",
|
||||
"Value #D": "CPU Limits",
|
||||
"Value #E": "CPU Limits %",
|
||||
"container": "Container"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "decbytes"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 14
|
||||
},
|
||||
"id": 3,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"legendFormat": "__auto"
|
||||
}
|
||||
],
|
||||
"title": "Memory Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "/%/"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percentunit"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 21
|
||||
},
|
||||
"id": 4,
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "Memory Quota",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "joinByField",
|
||||
"options": {
|
||||
"byField": "container",
|
||||
"mode": "outer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"Time 1": true,
|
||||
"Time 2": true,
|
||||
"Time 3": true,
|
||||
"Time 4": true,
|
||||
"Time 5": true
|
||||
},
|
||||
"indexByName": {
|
||||
"Time 1": 0,
|
||||
"Time 2": 1,
|
||||
"Time 3": 2,
|
||||
"Time 4": 3,
|
||||
"Time 5": 4,
|
||||
"Value #A": 6,
|
||||
"Value #B": 7,
|
||||
"Value #C": 8,
|
||||
"Value #D": 9,
|
||||
"Value #E": 10,
|
||||
"container": 5
|
||||
},
|
||||
"renameByName": {
|
||||
"Value #A": "Memory Usage",
|
||||
"Value #B": "Memory Requests",
|
||||
"Value #C": "Memory Requests %",
|
||||
"Value #D": "Memory Limits",
|
||||
"Value #E": "Memory Limits %",
|
||||
"container": "Container"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "bytes"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 28
|
||||
},
|
||||
"id": 5,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sort_desc(sum by (container) (rate(windows_container_network_received_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))",
|
||||
"legendFormat": "Received : {{ container }}"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sort_desc(sum by (container) (rate(windows_container_network_transmitted_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))",
|
||||
"legendFormat": "Transmitted : {{ container }}"
|
||||
}
|
||||
],
|
||||
"title": "Network I/O",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 39,
|
||||
"tags": [
|
||||
"kubernetes"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Data source",
|
||||
"name": "datasource",
|
||||
"query": "prometheus",
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"hide": 2,
|
||||
"label": "cluster",
|
||||
"name": "cluster",
|
||||
"query": "label_values(up{job=\"kubernetes-windows-exporter\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "namespace",
|
||||
"name": "namespace",
|
||||
"query": "label_values(windows_pod_container_available{cluster=\"$cluster\"}, namespace)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "pod",
|
||||
"name": "pod",
|
||||
"query": "label_values(windows_pod_container_available{cluster=\"$cluster\",namespace=\"$namespace\"}, pod)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Compute Resources / Pod(Windows)",
|
||||
"uid": "40597a704a610e936dc6ed374a7ce023"
|
||||
}
|
@ -50,7 +50,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -109,7 +109,7 @@
|
||||
"y": 7
|
||||
},
|
||||
"id": 2,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -240,7 +240,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -302,7 +302,7 @@
|
||||
"y": 21
|
||||
},
|
||||
"id": 4,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -453,7 +453,7 @@
|
||||
"y": 28
|
||||
},
|
||||
"id": 5,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -597,7 +597,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -648,7 +648,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -699,7 +699,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -750,7 +750,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -801,7 +801,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -852,7 +852,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -903,7 +903,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -954,7 +954,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -102,7 +102,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -189,7 +189,7 @@
|
||||
"y": 7
|
||||
},
|
||||
"id": 2,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -397,7 +397,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -487,7 +487,7 @@
|
||||
"y": 21
|
||||
},
|
||||
"id": 4,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -663,7 +663,7 @@
|
||||
"y": 28
|
||||
},
|
||||
"id": 5,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -807,7 +807,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -858,7 +858,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -909,7 +909,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -960,7 +960,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1011,7 +1011,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1062,7 +1062,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1113,7 +1113,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1164,7 +1164,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -0,0 +1,404 @@
|
||||
{
|
||||
"editable": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_cpu_utilisation:avg1m{cluster=\"$cluster\"} * node:windows_node_num_cpu:sum{cluster=\"$cluster\"} / scalar(sum(node:windows_node_num_cpu:sum{cluster=\"$cluster\"}))",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "CPU Utilisation",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"id": 2,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_memory_utilisation:ratio{cluster=\"$cluster\"}",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Memory Utilisation",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "short"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 7
|
||||
},
|
||||
"id": 3,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_memory_swap_io_pages:irate{cluster=\"$cluster\"}",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Memory Saturation (Swap I/O Pages)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 14
|
||||
},
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_disk_utilisation:avg_irate{cluster=\"$cluster\"} / scalar(node:windows_node:sum{cluster=\"$cluster\"})",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Disk IO Utilisation",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "Bps"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 21
|
||||
},
|
||||
"id": 5,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_net_utilisation:sum_irate{cluster=\"$cluster\"}",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Net Utilisation (Transmitted)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "Bps"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 21
|
||||
},
|
||||
"id": 6,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_net_saturation:sum_irate{cluster=\"$cluster\"}",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Net Utilisation (Dropped)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 28
|
||||
},
|
||||
"id": 7,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum by (instance)(node:windows_node_filesystem_usage:{cluster=\"$cluster\"})",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Disk Capacity",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 39,
|
||||
"tags": [
|
||||
"kubernetes"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Data source",
|
||||
"name": "datasource",
|
||||
"query": "prometheus",
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"hide": 2,
|
||||
"label": "cluster",
|
||||
"name": "cluster",
|
||||
"query": "label_values(up{job=\"kubernetes-windows-exporter\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "USE Method / Cluster(Windows)",
|
||||
"uid": "53a43377ec9aaf2ff64dfc7a1f539334"
|
||||
}
|
@ -0,0 +1,615 @@
|
||||
{
|
||||
"editable": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_cpu_utilisation:avg1m{cluster=\"$cluster\", instance=\"$instance\"}",
|
||||
"legendFormat": "Utilisation"
|
||||
}
|
||||
],
|
||||
"title": "CPU Utilisation",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum by (core) (irate(windows_cpu_time_total{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", mode!=\"idle\", instance=\"$instance\"}[$__rate_interval]))",
|
||||
"legendFormat": "{{core}}"
|
||||
}
|
||||
],
|
||||
"title": "CPU Usage Per Core",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"id": 3,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_memory_utilisation:{cluster=\"$cluster\", instance=\"$instance\"}",
|
||||
"legendFormat": "Memory"
|
||||
}
|
||||
],
|
||||
"title": "Memory Utilisation %",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "bytes"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 7
|
||||
},
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "max(\n windows_os_visible_memory_bytes{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"}\n - windows_memory_available_bytes{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"}\n)\n",
|
||||
"legendFormat": "memory used"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "max(node:windows_node_memory_totalCached_bytes:sum{cluster=\"$cluster\", instance=\"$instance\"})",
|
||||
"legendFormat": "memory cached"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "max(windows_memory_available_bytes{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"})",
|
||||
"legendFormat": "memory free"
|
||||
}
|
||||
],
|
||||
"title": "Memory Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "short"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 7
|
||||
},
|
||||
"id": 5,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_memory_swap_io_pages:irate{cluster=\"$cluster\", instance=\"$instance\"}",
|
||||
"legendFormat": "Swap IO"
|
||||
}
|
||||
],
|
||||
"title": "Memory Saturation (Swap I/O) Pages",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 14
|
||||
},
|
||||
"id": 6,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_disk_utilisation:avg_irate{cluster=\"$cluster\", instance=\"$instance\"}",
|
||||
"legendFormat": "Utilisation"
|
||||
}
|
||||
],
|
||||
"title": "Disk IO Utilisation",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "/io time/"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "ms"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 14
|
||||
},
|
||||
"id": 7,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "max(rate(windows_logical_disk_read_bytes_total{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"}[$__rate_interval]))",
|
||||
"legendFormat": "read"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "max(rate(windows_logical_disk_write_bytes_total{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"}[$__rate_interval]))",
|
||||
"legendFormat": "written"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "max(rate(windows_logical_disk_read_seconds_total{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"}[$__rate_interval]) + rate(windows_logical_disk_write_seconds_total{cluster=\"$cluster\", job=\"kubernetes-windows-exporter\", instance=\"$instance\"}[$__rate_interval]))",
|
||||
"legendFormat": "io time"
|
||||
}
|
||||
],
|
||||
"title": "Disk IO",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 21
|
||||
},
|
||||
"id": 8,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_filesystem_usage:{cluster=\"$cluster\", instance=\"$instance\"}",
|
||||
"legendFormat": "{{volume}}"
|
||||
}
|
||||
],
|
||||
"title": "Disk Utilisation",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "Bps"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 28
|
||||
},
|
||||
"id": 9,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_net_utilisation:sum_irate{cluster=\"$cluster\", instance=\"$instance\"}",
|
||||
"legendFormat": "Utilisation"
|
||||
}
|
||||
],
|
||||
"title": "Net Utilisation (Transmitted)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true
|
||||
},
|
||||
"unit": "Bps"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 28
|
||||
},
|
||||
"id": 10,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"asTable": true,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "node:windows_node_net_saturation:sum_irate{cluster=\"$cluster\", instance=\"$instance\"}",
|
||||
"legendFormat": "Saturation"
|
||||
}
|
||||
],
|
||||
"title": "Net Saturation (Dropped)",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 39,
|
||||
"tags": [
|
||||
"kubernetes"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Data source",
|
||||
"name": "datasource",
|
||||
"query": "prometheus",
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"hide": 2,
|
||||
"label": "cluster",
|
||||
"name": "cluster",
|
||||
"query": "label_values(up{job=\"kubernetes-windows-exporter\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "instance",
|
||||
"name": "instance",
|
||||
"query": "label_values(windows_system_system_up_time{cluster=\"$cluster\"}, instance)",
|
||||
"refresh": 2,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "USE Method / Node(Windows)",
|
||||
"uid": "96e7484b0bb53b74fbc2bcb7723cd40b"
|
||||
}
|
@ -35,7 +35,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -70,7 +70,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -105,7 +105,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -140,7 +140,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -175,7 +175,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -210,7 +210,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -261,7 +261,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -312,7 +312,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -363,7 +363,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -414,7 +414,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -473,7 +473,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -532,7 +532,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -583,7 +583,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -634,7 +634,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -685,7 +685,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -736,7 +736,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -787,7 +787,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -838,7 +838,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -889,7 +889,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -940,7 +940,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1015,15 +1015,15 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, verb, url, le))",
|
||||
"legendFormat": "{{instance}} {{verb}} {{url}}"
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, verb, le))",
|
||||
"legendFormat": "{{instance}} {{verb}}"
|
||||
}
|
||||
],
|
||||
"title": "Request duration 99th quantile",
|
||||
@ -1066,7 +1066,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1117,7 +1117,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -1168,7 +1168,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -53,7 +53,7 @@
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -106,7 +106,7 @@
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -177,7 +177,7 @@
|
||||
"y": 9
|
||||
},
|
||||
"id": 3,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -316,7 +316,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -362,7 +362,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -408,7 +408,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -454,7 +454,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -500,7 +500,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -546,7 +546,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -39,7 +39,7 @@
|
||||
"displayMode": "basic",
|
||||
"showUnfilled": false
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -78,7 +78,7 @@
|
||||
"displayMode": "basic",
|
||||
"showUnfilled": false
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -149,7 +149,7 @@
|
||||
"y": 9
|
||||
},
|
||||
"id": 3,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -335,7 +335,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -386,7 +386,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -437,7 +437,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -488,7 +488,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -539,7 +539,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -590,7 +590,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -641,7 +641,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -692,7 +692,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -50,7 +50,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -112,7 +112,7 @@
|
||||
},
|
||||
"id": 2,
|
||||
"interval": "1m",
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -162,7 +162,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -224,7 +224,7 @@
|
||||
},
|
||||
"id": 4,
|
||||
"interval": "1m",
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -53,7 +53,7 @@
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -106,7 +106,7 @@
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -152,7 +152,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -198,7 +198,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -244,7 +244,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -290,7 +290,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -336,7 +336,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -382,7 +382,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -35,7 +35,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -86,7 +86,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -137,7 +137,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -188,7 +188,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -239,7 +239,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -290,7 +290,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -365,15 +365,15 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
|
||||
"legendFormat": "{{verb}} {{url}}"
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[$__rate_interval])) by (verb, le))",
|
||||
"legendFormat": "{{verb}}"
|
||||
}
|
||||
],
|
||||
"title": "Post Request Latency 99th Quantile",
|
||||
@ -416,15 +416,15 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
|
||||
"legendFormat": "{{verb}} {{url}}"
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, le))",
|
||||
"legendFormat": "{{verb}}"
|
||||
}
|
||||
],
|
||||
"title": "Get Request Latency 99th Quantile",
|
||||
@ -467,7 +467,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -518,7 +518,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -569,7 +569,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -35,7 +35,7 @@
|
||||
"options": {
|
||||
"colorMode": "none"
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -86,7 +86,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -161,7 +161,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -236,7 +236,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -311,15 +311,15 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
|
||||
"legendFormat": "{{verb}} {{url}}"
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, le))",
|
||||
"legendFormat": "{{verb}}"
|
||||
}
|
||||
],
|
||||
"title": "Post Request Latency 99th Quantile",
|
||||
@ -362,15 +362,15 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
|
||||
"legendFormat": "{{verb}} {{url}}"
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, le))",
|
||||
"legendFormat": "{{verb}}"
|
||||
}
|
||||
],
|
||||
"title": "Get Request Latency 99th Quantile",
|
||||
@ -413,7 +413,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -464,7 +464,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -515,7 +515,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -39,7 +39,7 @@
|
||||
"displayMode": "basic",
|
||||
"showUnfilled": false
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -78,7 +78,7 @@
|
||||
"displayMode": "basic",
|
||||
"showUnfilled": false
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -117,7 +117,7 @@
|
||||
"displayMode": "basic",
|
||||
"showUnfilled": false
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -156,7 +156,7 @@
|
||||
"displayMode": "basic",
|
||||
"showUnfilled": false
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -207,7 +207,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -258,7 +258,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -309,7 +309,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -360,7 +360,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -411,7 +411,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@ -462,7 +462,7 @@
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v11.1.0",
|
||||
"pluginVersion": "v11.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -18,8 +18,8 @@
|
||||
"subdir": "contrib/mixin"
|
||||
}
|
||||
},
|
||||
"version": "2b323071a8bd4f02ddaf63e0dfa1fd98c221dccb",
|
||||
"sum": "IXI3LQIT9NmTPJAk8WLUJd5+qZfcGpeNCyWIK7oEpws="
|
||||
"version": "f30cbaac111aa01a310fe75360c759cdd4d9cd14",
|
||||
"sum": "XmXkOCriQIZmXwlIIFhqlJMa0e6qGWdxZD+ZDYaN0Po="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -41,16 +41,6 @@
|
||||
"version": "a1d61cce1da59c71409b99b5c7568511fec661ea",
|
||||
"sum": "342u++/7rViR/zj2jeJOjshzglkZ1SY+hFNuyCBFMdc="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/grafana/grafonnet-lib.git",
|
||||
"subdir": "grafonnet-7.0"
|
||||
}
|
||||
},
|
||||
"version": "a1d61cce1da59c71409b99b5c7568511fec661ea",
|
||||
"sum": "gCtR9s/4D5fxU9aKXg0Bru+/njZhA0YjLjPiASc61FM="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
@ -58,8 +48,8 @@
|
||||
"subdir": "gen/grafonnet-latest"
|
||||
}
|
||||
},
|
||||
"version": "82a19822e54a0a12a51e24dbd48fcde717dc0864",
|
||||
"sum": "64fMUPI3frXGj4X1FqFd1t7r04w3CUSmXaDcJ23EYbQ="
|
||||
"version": "d20e609202733790caf5b554c9945d049f243ae3",
|
||||
"sum": "V9vAj21qJOc2DlMPDgB1eEjSQU4A+sAA4AXuJ6bd4xc="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -68,18 +58,18 @@
|
||||
"subdir": "gen/grafonnet-v10.0.0"
|
||||
}
|
||||
},
|
||||
"version": "82a19822e54a0a12a51e24dbd48fcde717dc0864",
|
||||
"version": "d20e609202733790caf5b554c9945d049f243ae3",
|
||||
"sum": "xdcrJPJlpkq4+5LpGwN4tPAuheNNLXZjE6tDcyvFjr0="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/grafana/grafonnet.git",
|
||||
"subdir": "gen/grafonnet-v11.1.0"
|
||||
"subdir": "gen/grafonnet-v11.4.0"
|
||||
}
|
||||
},
|
||||
"version": "82a19822e54a0a12a51e24dbd48fcde717dc0864",
|
||||
"sum": "41w7p/rwrNsITqNHMXtGSJAfAyKmnflg6rFhKBduUxM="
|
||||
"version": "d20e609202733790caf5b554c9945d049f243ae3",
|
||||
"sum": "aVAX09paQYNOoCSKVpuk1exVIyBoMt/C50QJI+Q/3nA="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -88,7 +78,7 @@
|
||||
"subdir": "grafana-builder"
|
||||
}
|
||||
},
|
||||
"version": "767befa8fb46a07be516dec2777d7d89909a529d",
|
||||
"version": "d6c38bb26f576b128cadca4137d73a037afdd872",
|
||||
"sum": "yxqWcq/N3E/a/XreeU6EuE6X7kYPnG0AspAQFKOjASo="
|
||||
},
|
||||
{
|
||||
@ -108,8 +98,8 @@
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "63d430b69a95741061c2f7fc9d84b1a778511d9c",
|
||||
"sum": "qiZi3axUSXCVzKUF83zSAxklwrnitMmrDK4XAfjPMdE="
|
||||
"version": "1199b50e9d2ff53d4bb5fb2304ad1fb69d38e609",
|
||||
"sum": "LfbgcJbilu4uBdKYZSvmkoOTPwEAzg10L3/VqKAIWtA="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -118,8 +108,8 @@
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "a3fbf21977deb89b7d843eb8371170c011ea6835",
|
||||
"sum": "57zW2IGJ9zbYd8BI0qe6JkoWTRSMNiBUWC6+YcnEsWo="
|
||||
"version": "e27267571be06c2bdc3d2fd8dbd70161cd709cb4",
|
||||
"sum": "je1RPCp2aFNefYs5Q57Q5wDm93p8pL4pdBtA5rC7jLA="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -128,8 +118,8 @@
|
||||
"subdir": "jsonnet/kube-state-metrics"
|
||||
}
|
||||
},
|
||||
"version": "32e7727ff4613b0f55dfc18aff15afb8c04d03c5",
|
||||
"sum": "lO7jUSzAIy8Yk9pOWJIWgPRhubkWzVh56W6wtYfbVH4="
|
||||
"version": "2a95d4649b2fea55799032fb9c0b571c4ba7f776",
|
||||
"sum": "3bioG7CfTfY9zeu5xU4yon6Zt3kYvNkyl492nOhQxnM="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -138,7 +128,7 @@
|
||||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
||||
}
|
||||
},
|
||||
"version": "32e7727ff4613b0f55dfc18aff15afb8c04d03c5",
|
||||
"version": "2a95d4649b2fea55799032fb9c0b571c4ba7f776",
|
||||
"sum": "qclI7LwucTjBef3PkGBkKxF0mfZPbHnn4rlNWKGtR4c="
|
||||
},
|
||||
{
|
||||
@ -148,8 +138,8 @@
|
||||
"subdir": "jsonnet/kube-prometheus"
|
||||
}
|
||||
},
|
||||
"version": "7e5a571a3fb735c78e17c76a637eb7e8bb5dd086",
|
||||
"sum": "uTw/Mj+X91S+oqUpAX81xcfWPDlox0tdSZY/YBw7nGE="
|
||||
"version": "1eea946a1532f1e8cccfceea98d907bf3a10b1d9",
|
||||
"sum": "17LhiwefVfoNDsF3DcFZw/UL4PMU7YpNNUaOdaYd1gE="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -158,7 +148,7 @@
|
||||
"subdir": "jsonnet/mixin"
|
||||
}
|
||||
},
|
||||
"version": "a366602bacb2c8d773a9cee058b6971b8d2e3732",
|
||||
"version": "465bcbaf2a727c942e7f923aacfb9dff9af8d4a1",
|
||||
"sum": "gi+knjdxs2T715iIQIntrimbHRgHnpM8IFBJDD1gYfs=",
|
||||
"name": "prometheus-operator-mixin"
|
||||
},
|
||||
@ -169,8 +159,8 @@
|
||||
"subdir": "jsonnet/prometheus-operator"
|
||||
}
|
||||
},
|
||||
"version": "a366602bacb2c8d773a9cee058b6971b8d2e3732",
|
||||
"sum": "z0/lCiMusMHTqntsosMVGYkVcSZjCpyZBmUMVUsK5nA="
|
||||
"version": "465bcbaf2a727c942e7f923aacfb9dff9af8d4a1",
|
||||
"sum": "LctDdofQostvviE5y8vpRKWGGO1ZKO3dgJe7P9xifW0="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -179,7 +169,7 @@
|
||||
"subdir": "doc/alertmanager-mixin"
|
||||
}
|
||||
},
|
||||
"version": "0f65e8fa5fc72d2678655105c0213b416ca6f34c",
|
||||
"version": "b5d1a64ad5bb0ff879705714d1e40cea82efbd5c",
|
||||
"sum": "Mf4h1BYLle2nrgjf/HXrBbl0Zk8N+xaoEM017o0BC+k=",
|
||||
"name": "alertmanager"
|
||||
},
|
||||
@ -190,8 +180,8 @@
|
||||
"subdir": "docs/node-mixin"
|
||||
}
|
||||
},
|
||||
"version": "cf8c6891cc610e54f70383addd4bb6079f0add35",
|
||||
"sum": "cQCW+1N0Xae5yXecCWDK2oAlN0luBS/5GrwBYSlaFms="
|
||||
"version": "11365f97bef6cb0e6259d536a7e21c49e3f5c065",
|
||||
"sum": "xYj6VYFT/eafsbleNlC+Z2VfLy1CndyYrJs9BcTmnX8="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -200,8 +190,8 @@
|
||||
"subdir": "documentation/prometheus-mixin"
|
||||
}
|
||||
},
|
||||
"version": "b407c2930da4f50c0d17fc39404c6302a9eb740b",
|
||||
"sum": "OYT5u3S8DbamuJV/v3gbWSteOvFzMeNwMj+u4Apk7jM=",
|
||||
"version": "509b978f0d675b4c9b3ccf8c0fc06961b0f03e8f",
|
||||
"sum": "2c+wttfee9TwuQJZIkNV7Tekem74Qgc7iZ842P28rNw=",
|
||||
"name": "prometheus"
|
||||
},
|
||||
{
|
||||
@ -222,7 +212,7 @@
|
||||
"subdir": "mixin"
|
||||
}
|
||||
},
|
||||
"version": "7037331e6ea7dbe85a1b7af37bf8ea277a80663d",
|
||||
"version": "346d18bb0f8011c63d7106de494cf3b9253161a1",
|
||||
"sum": "ieCD4eMgGbOlrI8GmckGPHBGQDcLasE1rULYq56W/bs=",
|
||||
"name": "thanos-mixin"
|
||||
}
|
||||
|
@ -6,5 +6,5 @@ dashboards:
|
||||
url: https://grafana.com/api/dashboards/9578/revisions/4/download
|
||||
tags: []
|
||||
- name: Prometheus
|
||||
url: https://grafana.com/api/dashboards/3662/revisions/2/download
|
||||
url: https://grafana.com/api/dashboards/19105/revisions/7/download
|
||||
tags: []
|
||||
|
@ -7,7 +7,7 @@
|
||||
"app.kubernetes.io/instance": "main",
|
||||
"app.kubernetes.io/name": "alertmanager",
|
||||
"app.kubernetes.io/part-of": "kube-prometheus",
|
||||
"app.kubernetes.io/version": "0.27.0",
|
||||
"app.kubernetes.io/version": "0.28.0",
|
||||
"prometheus": "k8s",
|
||||
"role": "alert-rules"
|
||||
},
|
||||
|
@ -20,9 +20,9 @@
|
||||
"summary": "etcd cluster members are down."
|
||||
},
|
||||
"expr": "max without (endpoint) (\n sum without (instance, pod) (up{job=~\".*etcd.*\"} == bool 0)\nor\n count without (To) (\n sum without (instance, pod) (rate(etcd_network_peer_sent_failures_total{job=~\".*etcd.*\"}[120s])) > 0.01\n )\n)\n> 0\n",
|
||||
"for": "10m",
|
||||
"for": "20m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
|
@ -6,7 +6,7 @@
|
||||
"app.kubernetes.io/component": "exporter",
|
||||
"app.kubernetes.io/name": "kube-state-metrics",
|
||||
"app.kubernetes.io/part-of": "kube-prometheus",
|
||||
"app.kubernetes.io/version": "2.14.0",
|
||||
"app.kubernetes.io/version": "2.15.0",
|
||||
"prometheus": "k8s",
|
||||
"role": "alert-rules"
|
||||
},
|
||||
|
@ -19,7 +19,7 @@
|
||||
{
|
||||
"alert": "KubePodCrashLooping",
|
||||
"annotations": {
|
||||
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is in waiting state (reason: \"CrashLoopBackOff\").",
|
||||
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is in waiting state (reason: \"CrashLoopBackOff\") on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping",
|
||||
"summary": "Pod is crash looping."
|
||||
},
|
||||
@ -32,7 +32,7 @@
|
||||
{
|
||||
"alert": "KubePodNotReady",
|
||||
"annotations": {
|
||||
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes.",
|
||||
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready",
|
||||
"summary": "Pod has been in a non-ready state for more than 15 minutes."
|
||||
},
|
||||
@ -45,7 +45,7 @@
|
||||
{
|
||||
"alert": "KubeDeploymentGenerationMismatch",
|
||||
"annotations": {
|
||||
"description": "Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back.",
|
||||
"description": "Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch",
|
||||
"summary": "Deployment generation mismatch due to possible roll-back"
|
||||
},
|
||||
@ -58,7 +58,7 @@
|
||||
{
|
||||
"alert": "KubeDeploymentReplicasMismatch",
|
||||
"annotations": {
|
||||
"description": "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes.",
|
||||
"description": "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch",
|
||||
"summary": "Deployment has not matched the expected number of replicas."
|
||||
},
|
||||
@ -71,7 +71,7 @@
|
||||
{
|
||||
"alert": "KubeDeploymentRolloutStuck",
|
||||
"annotations": {
|
||||
"description": "Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment }} is not progressing for longer than 15 minutes.",
|
||||
"description": "Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment }} is not progressing for longer than 15 minutes on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentrolloutstuck",
|
||||
"summary": "Deployment rollout is not progressing."
|
||||
},
|
||||
@ -84,7 +84,7 @@
|
||||
{
|
||||
"alert": "KubeStatefulSetReplicasMismatch",
|
||||
"annotations": {
|
||||
"description": "StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes.",
|
||||
"description": "StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch",
|
||||
"summary": "StatefulSet has not matched the expected number of replicas."
|
||||
},
|
||||
@ -97,7 +97,7 @@
|
||||
{
|
||||
"alert": "KubeStatefulSetGenerationMismatch",
|
||||
"annotations": {
|
||||
"description": "StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back.",
|
||||
"description": "StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch",
|
||||
"summary": "StatefulSet generation mismatch due to possible roll-back"
|
||||
},
|
||||
@ -110,7 +110,7 @@
|
||||
{
|
||||
"alert": "KubeStatefulSetUpdateNotRolledOut",
|
||||
"annotations": {
|
||||
"description": "StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out.",
|
||||
"description": "StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout",
|
||||
"summary": "StatefulSet update has not been rolled out."
|
||||
},
|
||||
@ -123,7 +123,7 @@
|
||||
{
|
||||
"alert": "KubeDaemonSetRolloutStuck",
|
||||
"annotations": {
|
||||
"description": "DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15m.",
|
||||
"description": "DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15m on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck",
|
||||
"summary": "DaemonSet rollout is stuck."
|
||||
},
|
||||
@ -136,7 +136,7 @@
|
||||
{
|
||||
"alert": "KubeContainerWaiting",
|
||||
"annotations": {
|
||||
"description": "pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container {{ $labels.container}} has been in waiting state for longer than 1 hour. (reason: \"{{ $labels.reason }}\").",
|
||||
"description": "pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container {{ $labels.container}} has been in waiting state for longer than 1 hour. (reason: \"{{ $labels.reason }}\") on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting",
|
||||
"summary": "Pod container waiting longer than 1 hour"
|
||||
},
|
||||
@ -149,7 +149,7 @@
|
||||
{
|
||||
"alert": "KubeDaemonSetNotScheduled",
|
||||
"annotations": {
|
||||
"description": "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.",
|
||||
"description": "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled",
|
||||
"summary": "DaemonSet pods are not scheduled."
|
||||
},
|
||||
@ -162,7 +162,7 @@
|
||||
{
|
||||
"alert": "KubeDaemonSetMisScheduled",
|
||||
"annotations": {
|
||||
"description": "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.",
|
||||
"description": "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled",
|
||||
"summary": "DaemonSet pods are misscheduled."
|
||||
},
|
||||
@ -175,7 +175,7 @@
|
||||
{
|
||||
"alert": "KubeJobNotCompleted",
|
||||
"annotations": {
|
||||
"description": "Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than {{ \"43200\" | humanizeDuration }} to complete.",
|
||||
"description": "Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than {{ \"43200\" | humanizeDuration }} to complete on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted",
|
||||
"summary": "Job did not complete in time"
|
||||
},
|
||||
@ -187,7 +187,7 @@
|
||||
{
|
||||
"alert": "KubeJobFailed",
|
||||
"annotations": {
|
||||
"description": "Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. Removing failed job after investigation should clear this alert.",
|
||||
"description": "Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. Removing failed job after investigation should clear this alert on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed",
|
||||
"summary": "Job failed to complete."
|
||||
},
|
||||
@ -200,7 +200,7 @@
|
||||
{
|
||||
"alert": "KubeHpaReplicasMismatch",
|
||||
"annotations": {
|
||||
"description": "HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has not matched the desired number of replicas for longer than 15 minutes.",
|
||||
"description": "HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has not matched the desired number of replicas for longer than 15 minutes on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch",
|
||||
"summary": "HPA has not matched desired number of replicas."
|
||||
},
|
||||
@ -213,7 +213,7 @@
|
||||
{
|
||||
"alert": "KubeHpaMaxedOut",
|
||||
"annotations": {
|
||||
"description": "HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has been running at max replicas for longer than 15 minutes.",
|
||||
"description": "HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has been running at max replicas for longer than 15 minutes on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout",
|
||||
"summary": "HPA is running at max replicas"
|
||||
},
|
||||
@ -257,7 +257,7 @@
|
||||
{
|
||||
"alert": "KubeQuotaAlmostFull",
|
||||
"annotations": {
|
||||
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.",
|
||||
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull",
|
||||
"summary": "Namespace quota is going to be full."
|
||||
},
|
||||
@ -270,7 +270,7 @@
|
||||
{
|
||||
"alert": "KubeQuotaFullyUsed",
|
||||
"annotations": {
|
||||
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.",
|
||||
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused",
|
||||
"summary": "Namespace quota is fully used."
|
||||
},
|
||||
@ -283,7 +283,7 @@
|
||||
{
|
||||
"alert": "KubeQuotaExceeded",
|
||||
"annotations": {
|
||||
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.",
|
||||
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded",
|
||||
"summary": "Namespace quota has exceeded the limits."
|
||||
},
|
||||
@ -296,7 +296,7 @@
|
||||
{
|
||||
"alert": "CPUThrottlingHigh",
|
||||
"annotations": {
|
||||
"description": "{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.",
|
||||
"description": "{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }} on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh",
|
||||
"summary": "Processes experience elevated CPU throttling."
|
||||
},
|
||||
@ -396,7 +396,7 @@
|
||||
{
|
||||
"alert": "KubeVersionMismatch",
|
||||
"annotations": {
|
||||
"description": "There are {{ $value }} different semantic versions of Kubernetes components running.",
|
||||
"description": "There are {{ $value }} different semantic versions of Kubernetes components running on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch",
|
||||
"summary": "Different semantic versions of Kubernetes components running."
|
||||
},
|
||||
@ -409,7 +409,7 @@
|
||||
{
|
||||
"alert": "KubeClientErrors",
|
||||
"annotations": {
|
||||
"description": "Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.'",
|
||||
"description": "Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors",
|
||||
"summary": "Kubernetes API server client is experiencing errors."
|
||||
},
|
||||
@ -427,7 +427,7 @@
|
||||
{
|
||||
"alert": "KubeAPIErrorBudgetBurn",
|
||||
"annotations": {
|
||||
"description": "The API server is burning too much error budget.",
|
||||
"description": "The API server is burning too much error budget on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn",
|
||||
"summary": "The API server is burning too much error budget."
|
||||
},
|
||||
@ -442,7 +442,7 @@
|
||||
{
|
||||
"alert": "KubeAPIErrorBudgetBurn",
|
||||
"annotations": {
|
||||
"description": "The API server is burning too much error budget.",
|
||||
"description": "The API server is burning too much error budget on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn",
|
||||
"summary": "The API server is burning too much error budget."
|
||||
},
|
||||
@ -457,7 +457,7 @@
|
||||
{
|
||||
"alert": "KubeAPIErrorBudgetBurn",
|
||||
"annotations": {
|
||||
"description": "The API server is burning too much error budget.",
|
||||
"description": "The API server is burning too much error budget on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn",
|
||||
"summary": "The API server is burning too much error budget."
|
||||
},
|
||||
@ -472,7 +472,7 @@
|
||||
{
|
||||
"alert": "KubeAPIErrorBudgetBurn",
|
||||
"annotations": {
|
||||
"description": "The API server is burning too much error budget.",
|
||||
"description": "The API server is burning too much error budget on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn",
|
||||
"summary": "The API server is burning too much error budget."
|
||||
},
|
||||
@ -518,11 +518,12 @@
|
||||
{
|
||||
"alert": "KubeAggregatedAPIErrors",
|
||||
"annotations": {
|
||||
"description": "Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m.",
|
||||
"description": "Kubernetes aggregated API {{ $labels.instance }}/{{ $labels.name }} has reported {{ $labels.reason }} errors on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors",
|
||||
"summary": "Kubernetes aggregated API has reported errors."
|
||||
},
|
||||
"expr": "sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job=\"apiserver\"}[10m])) > 4\n",
|
||||
"expr": "sum by(cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job=\"apiserver\"}[1m])) > 0\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
@ -530,7 +531,7 @@
|
||||
{
|
||||
"alert": "KubeAggregatedAPIDown",
|
||||
"annotations": {
|
||||
"description": "Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m.",
|
||||
"description": "Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown",
|
||||
"summary": "Kubernetes aggregated API is down."
|
||||
},
|
||||
@ -556,7 +557,7 @@
|
||||
{
|
||||
"alert": "KubeAPITerminatedRequests",
|
||||
"annotations": {
|
||||
"description": "The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.",
|
||||
"description": "The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests",
|
||||
"summary": "The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests."
|
||||
},
|
||||
@ -574,11 +575,11 @@
|
||||
{
|
||||
"alert": "KubeNodeNotReady",
|
||||
"annotations": {
|
||||
"description": "{{ $labels.node }} has been unready for more than 15 minutes.",
|
||||
"description": "{{ $labels.node }} has been unready for more than 15 minutes on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodenotready",
|
||||
"summary": "Node is not ready."
|
||||
},
|
||||
"expr": "kube_node_status_condition{job=\"kube-state-metrics\",condition=\"Ready\",status=\"true\"} == 0\n",
|
||||
"expr": "kube_node_status_condition{job=\"kube-state-metrics\",condition=\"Ready\",status=\"true\"} == 0\nand on (cluster, node)\nkube_node_spec_unschedulable{job=\"kube-state-metrics\"} == 0\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -587,7 +588,7 @@
|
||||
{
|
||||
"alert": "KubeNodeUnreachable",
|
||||
"annotations": {
|
||||
"description": "{{ $labels.node }} is unreachable and some workloads may be rescheduled.",
|
||||
"description": "{{ $labels.node }} is unreachable and some workloads may be rescheduled on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodeunreachable",
|
||||
"summary": "Node is unreachable."
|
||||
},
|
||||
@ -600,11 +601,11 @@
|
||||
{
|
||||
"alert": "KubeletTooManyPods",
|
||||
"annotations": {
|
||||
"description": "Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.",
|
||||
"description": "Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods",
|
||||
"summary": "Kubelet is running at capacity."
|
||||
},
|
||||
"expr": "count by(cluster, node) (\n (kube_pod_status_phase{job=\"kube-state-metrics\",phase=\"Running\"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job=\"kube-state-metrics\"})\n)\n/\nmax by(cluster, node) (\n kube_node_status_capacity{job=\"kube-state-metrics\",resource=\"pods\"} != 1\n) > 0.95\n",
|
||||
"expr": "(\n max by (cluster, instance) (\n kubelet_running_pods{job=\"kubelet\", metrics_path=\"/metrics\"} > 1\n )\n * on (cluster, instance) group_left(node)\n max by (cluster, instance, node) (\n kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"}\n )\n)\n/ on (cluster, node) group_left()\nmax by (cluster, node) (\n kube_node_status_capacity{job=\"kube-state-metrics\", resource=\"pods\"} != 1\n) > 0.95\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "info"
|
||||
@ -613,11 +614,11 @@
|
||||
{
|
||||
"alert": "KubeNodeReadinessFlapping",
|
||||
"annotations": {
|
||||
"description": "The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes.",
|
||||
"description": "The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping",
|
||||
"summary": "Node readiness status is flapping."
|
||||
},
|
||||
"expr": "sum(changes(kube_node_status_condition{job=\"kube-state-metrics\",status=\"true\",condition=\"Ready\"}[15m])) by (cluster, node) > 2\n",
|
||||
"expr": "sum(changes(kube_node_status_condition{job=\"kube-state-metrics\",status=\"true\",condition=\"Ready\"}[15m])) by (cluster, node) > 2\nand on (cluster, node)\nkube_node_spec_unschedulable{job=\"kube-state-metrics\"} == 0\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -626,7 +627,7 @@
|
||||
{
|
||||
"alert": "KubeletPlegDurationHigh",
|
||||
"annotations": {
|
||||
"description": "The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.",
|
||||
"description": "The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }} on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletplegdurationhigh",
|
||||
"summary": "Kubelet Pod Lifecycle Event Generator is taking too long to relist."
|
||||
},
|
||||
@ -639,7 +640,7 @@
|
||||
{
|
||||
"alert": "KubeletPodStartUpLatencyHigh",
|
||||
"annotations": {
|
||||
"description": "Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.",
|
||||
"description": "Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }} on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh",
|
||||
"summary": "Kubelet Pod startup latency is too high."
|
||||
},
|
||||
@ -652,7 +653,7 @@
|
||||
{
|
||||
"alert": "KubeletClientCertificateExpiration",
|
||||
"annotations": {
|
||||
"description": "Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.",
|
||||
"description": "Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }} on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration",
|
||||
"summary": "Kubelet client certificate is about to expire."
|
||||
},
|
||||
@ -664,7 +665,7 @@
|
||||
{
|
||||
"alert": "KubeletClientCertificateExpiration",
|
||||
"annotations": {
|
||||
"description": "Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.",
|
||||
"description": "Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }} on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration",
|
||||
"summary": "Kubelet client certificate is about to expire."
|
||||
},
|
||||
@ -676,7 +677,7 @@
|
||||
{
|
||||
"alert": "KubeletServerCertificateExpiration",
|
||||
"annotations": {
|
||||
"description": "Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.",
|
||||
"description": "Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }} on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration",
|
||||
"summary": "Kubelet server certificate is about to expire."
|
||||
},
|
||||
@ -688,7 +689,7 @@
|
||||
{
|
||||
"alert": "KubeletServerCertificateExpiration",
|
||||
"annotations": {
|
||||
"description": "Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.",
|
||||
"description": "Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }} on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration",
|
||||
"summary": "Kubelet server certificate is about to expire."
|
||||
},
|
||||
@ -700,7 +701,7 @@
|
||||
{
|
||||
"alert": "KubeletClientCertificateRenewalErrors",
|
||||
"annotations": {
|
||||
"description": "Kubelet on node {{ $labels.node }} has failed to renew its client certificate ({{ $value | humanize }} errors in the last 5 minutes).",
|
||||
"description": "Kubelet on node {{ $labels.node }} has failed to renew its client certificate ({{ $value | humanize }} errors in the last 5 minutes) on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificaterenewalerrors",
|
||||
"summary": "Kubelet has failed to renew its client certificate."
|
||||
},
|
||||
@ -713,7 +714,7 @@
|
||||
{
|
||||
"alert": "KubeletServerCertificateRenewalErrors",
|
||||
"annotations": {
|
||||
"description": "Kubelet on node {{ $labels.node }} has failed to renew its server certificate ({{ $value | humanize }} errors in the last 5 minutes).",
|
||||
"description": "Kubelet on node {{ $labels.node }} has failed to renew its server certificate ({{ $value | humanize }} errors in the last 5 minutes) on cluster {{ $labels.cluster }}.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificaterenewalerrors",
|
||||
"summary": "Kubelet has failed to renew its server certificate."
|
||||
},
|
||||
@ -809,25 +810,25 @@
|
||||
"record": "cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster, verb, scope) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{le=\"+Inf\"} * 24 * 30)\n",
|
||||
"expr": "sum by (cluster, verb, scope) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{le=\"+Inf\"})\n",
|
||||
"record": "cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n",
|
||||
"expr": "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=~\"1(\\\\.0)?\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=~\"5(\\\\.0)?\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=~\"30(\\\\.0)?\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n",
|
||||
"labels": {
|
||||
"verb": "all"
|
||||
},
|
||||
"record": "apiserver_request:availability30d"
|
||||
},
|
||||
{
|
||||
"expr": "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n",
|
||||
"expr": "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=~\"5(\\\\.0)?\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=~\"30(\\\\.0)?\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:availability30d"
|
||||
},
|
||||
{
|
||||
"expr": "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n",
|
||||
"expr": "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=~\"1(\\\\.0)?\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
@ -869,98 +870,98 @@
|
||||
"name": "kube-apiserver-burnrate.rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate1d"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate1h"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate2h"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate30m"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate3d"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate5m"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate6h"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate1d"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate1h"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate2h"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate30m"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate3d"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate5m"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n",
|
||||
"expr": "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
@ -1237,6 +1238,144 @@
|
||||
"record": "node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "windows.node.rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "count by (cluster) (\n windows_system_system_up_time{job=\"kubernetes-windows-exporter\"}\n)\n",
|
||||
"record": "node:windows_node:sum"
|
||||
},
|
||||
{
|
||||
"expr": "count by (cluster, instance) (sum by (cluster, instance, core) (\n windows_cpu_time_total{job=\"kubernetes-windows-exporter\"}\n))\n",
|
||||
"record": "node:windows_node_num_cpu:sum"
|
||||
},
|
||||
{
|
||||
"expr": "1 - avg by (cluster) (rate(windows_cpu_time_total{job=\"kubernetes-windows-exporter\",mode=\"idle\"}[1m]))\n",
|
||||
"record": ":windows_node_cpu_utilisation:avg1m"
|
||||
},
|
||||
{
|
||||
"expr": "1 - avg by (cluster, instance) (\n rate(windows_cpu_time_total{job=\"kubernetes-windows-exporter\",mode=\"idle\"}[1m])\n)\n",
|
||||
"record": "node:windows_node_cpu_utilisation:avg1m"
|
||||
},
|
||||
{
|
||||
"expr": "1 -\nsum by (cluster) (windows_memory_available_bytes{job=\"kubernetes-windows-exporter\"})\n/\nsum by (cluster) (windows_os_visible_memory_bytes{job=\"kubernetes-windows-exporter\"})\n",
|
||||
"record": ":windows_node_memory_utilisation:"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster) (windows_memory_available_bytes{job=\"kubernetes-windows-exporter\"} + windows_memory_cache_bytes{job=\"kubernetes-windows-exporter\"})\n",
|
||||
"record": ":windows_node_memory_MemFreeCached_bytes:sum"
|
||||
},
|
||||
{
|
||||
"expr": "(windows_memory_cache_bytes{job=\"kubernetes-windows-exporter\"} + windows_memory_modified_page_list_bytes{job=\"kubernetes-windows-exporter\"} + windows_memory_standby_cache_core_bytes{job=\"kubernetes-windows-exporter\"} + windows_memory_standby_cache_normal_priority_bytes{job=\"kubernetes-windows-exporter\"} + windows_memory_standby_cache_reserve_bytes{job=\"kubernetes-windows-exporter\"})\n",
|
||||
"record": "node:windows_node_memory_totalCached_bytes:sum"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster) (windows_os_visible_memory_bytes{job=\"kubernetes-windows-exporter\"})\n",
|
||||
"record": ":windows_node_memory_MemTotal_bytes:sum"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster, instance) (\n (windows_memory_available_bytes{job=\"kubernetes-windows-exporter\"})\n)\n",
|
||||
"record": "node:windows_node_memory_bytes_available:sum"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster, instance) (\n windows_os_visible_memory_bytes{job=\"kubernetes-windows-exporter\"}\n)\n",
|
||||
"record": "node:windows_node_memory_bytes_total:sum"
|
||||
},
|
||||
{
|
||||
"expr": "(node:windows_node_memory_bytes_total:sum - node:windows_node_memory_bytes_available:sum)\n/\nscalar(sum(node:windows_node_memory_bytes_total:sum))\n",
|
||||
"record": "node:windows_node_memory_utilisation:ratio"
|
||||
},
|
||||
{
|
||||
"expr": "1 - (node:windows_node_memory_bytes_available:sum / node:windows_node_memory_bytes_total:sum)\n",
|
||||
"record": "node:windows_node_memory_utilisation:"
|
||||
},
|
||||
{
|
||||
"expr": "irate(windows_memory_swap_page_operations_total{job=\"kubernetes-windows-exporter\"}[5m])\n",
|
||||
"record": "node:windows_node_memory_swap_io_pages:irate"
|
||||
},
|
||||
{
|
||||
"expr": "avg by (cluster) (irate(windows_logical_disk_read_seconds_total{job=\"kubernetes-windows-exporter\"}[1m]) +\n irate(windows_logical_disk_write_seconds_total{job=\"kubernetes-windows-exporter\"}[1m])\n )\n",
|
||||
"record": ":windows_node_disk_utilisation:avg_irate"
|
||||
},
|
||||
{
|
||||
"expr": "avg by (cluster, instance) (\n (irate(windows_logical_disk_read_seconds_total{job=\"kubernetes-windows-exporter\"}[1m]) +\n irate(windows_logical_disk_write_seconds_total{job=\"kubernetes-windows-exporter\"}[1m]))\n)\n",
|
||||
"record": "node:windows_node_disk_utilisation:avg_irate"
|
||||
},
|
||||
{
|
||||
"expr": "max by (cluster,instance,volume)(\n (windows_logical_disk_size_bytes{job=\"kubernetes-windows-exporter\"}\n- windows_logical_disk_free_bytes{job=\"kubernetes-windows-exporter\"})\n/ windows_logical_disk_size_bytes{job=\"kubernetes-windows-exporter\"}\n)\n",
|
||||
"record": "node:windows_node_filesystem_usage:"
|
||||
},
|
||||
{
|
||||
"expr": "max by (cluster, instance, volume) (windows_logical_disk_free_bytes{job=\"kubernetes-windows-exporter\"} / windows_logical_disk_size_bytes{job=\"kubernetes-windows-exporter\"})\n",
|
||||
"record": "node:windows_node_filesystem_avail:"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster) (irate(windows_net_bytes_total{job=\"kubernetes-windows-exporter\"}[1m]))\n",
|
||||
"record": ":windows_node_net_utilisation:sum_irate"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster, instance) (\n (irate(windows_net_bytes_total{job=\"kubernetes-windows-exporter\"}[1m]))\n)\n",
|
||||
"record": "node:windows_node_net_utilisation:sum_irate"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster) (irate(windows_net_packets_received_discarded_total{job=\"kubernetes-windows-exporter\"}[1m])) +\nsum by (cluster) (irate(windows_net_packets_outbound_discarded_total{job=\"kubernetes-windows-exporter\"}[1m]))\n",
|
||||
"record": ":windows_node_net_saturation:sum_irate"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster, instance) (\n (irate(windows_net_packets_received_discarded_total{job=\"kubernetes-windows-exporter\"}[1m]) +\n irate(windows_net_packets_outbound_discarded_total{job=\"kubernetes-windows-exporter\"}[1m]))\n)\n",
|
||||
"record": "node:windows_node_net_saturation:sum_irate"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "windows.pod.rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "windows_container_available{job=\"kubernetes-windows-exporter\", container_id != \"\"} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace, cluster)\n",
|
||||
"record": "windows_pod_container_available"
|
||||
},
|
||||
{
|
||||
"expr": "windows_container_cpu_usage_seconds_total{job=\"kubernetes-windows-exporter\", container_id != \"\"} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace, cluster)\n",
|
||||
"record": "windows_container_total_runtime"
|
||||
},
|
||||
{
|
||||
"expr": "windows_container_memory_usage_commit_bytes{job=\"kubernetes-windows-exporter\", container_id != \"\"} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace, cluster)\n",
|
||||
"record": "windows_container_memory_usage"
|
||||
},
|
||||
{
|
||||
"expr": "windows_container_memory_usage_private_working_set_bytes{job=\"kubernetes-windows-exporter\", container_id != \"\"} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace, cluster)\n",
|
||||
"record": "windows_container_private_working_set_usage"
|
||||
},
|
||||
{
|
||||
"expr": "windows_container_network_receive_bytes_total{job=\"kubernetes-windows-exporter\", container_id != \"\"} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace, cluster)\n",
|
||||
"record": "windows_container_network_received_bytes_total"
|
||||
},
|
||||
{
|
||||
"expr": "windows_container_network_transmit_bytes_total{job=\"kubernetes-windows-exporter\", container_id != \"\"} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace, cluster)\n",
|
||||
"record": "windows_container_network_transmitted_bytes_total"
|
||||
},
|
||||
{
|
||||
"expr": "max by (cluster, namespace, pod, container) (\n kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"}\n) * on(container,pod,namespace,cluster) (windows_pod_container_available)\n",
|
||||
"record": "kube_pod_windows_container_resource_memory_request"
|
||||
},
|
||||
{
|
||||
"expr": "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on(container,pod,namespace,cluster) (windows_pod_container_available)\n",
|
||||
"record": "kube_pod_windows_container_resource_memory_limit"
|
||||
},
|
||||
{
|
||||
"expr": "max by (cluster, namespace, pod, container) (\n kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"}\n) * on(container,pod,namespace,cluster) (windows_pod_container_available)\n",
|
||||
"record": "kube_pod_windows_container_resource_cpu_cores_request"
|
||||
},
|
||||
{
|
||||
"expr": "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on(container,pod,namespace,cluster) (windows_pod_container_available)\n",
|
||||
"record": "kube_pod_windows_container_resource_cpu_cores_limit"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster, namespace, pod, container) (\n rate(windows_container_total_runtime{}[5m])\n)\n",
|
||||
"record": "namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -151,7 +151,7 @@
|
||||
{
|
||||
"alert": "NodeHighNumberConntrackEntriesUsed",
|
||||
"annotations": {
|
||||
"description": "{{ $value | humanizePercentage }} of conntrack entries are used.",
|
||||
"description": "{{ $labels.instance }} {{ $value | humanizePercentage }} of conntrack entries are used.",
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused",
|
||||
"summary": "Number of conntrack are getting close to the limit."
|
||||
},
|
||||
@ -256,7 +256,7 @@
|
||||
"runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/node/nodecpuhighusage",
|
||||
"summary": "High CPU usage."
|
||||
},
|
||||
"expr": "sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode!=\"idle\"}[2m]))) * 100 > 90\n",
|
||||
"expr": "sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode!~\"idle|iowait\"}[2m]))) * 100 > 90\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "info"
|
||||
|
@ -6,7 +6,7 @@
|
||||
"app.kubernetes.io/component": "controller",
|
||||
"app.kubernetes.io/name": "prometheus-operator",
|
||||
"app.kubernetes.io/part-of": "kube-prometheus",
|
||||
"app.kubernetes.io/version": "0.78.2",
|
||||
"app.kubernetes.io/version": "0.80.0",
|
||||
"prometheus": "k8s",
|
||||
"role": "alert-rules"
|
||||
},
|
||||
|
@ -7,7 +7,7 @@
|
||||
"app.kubernetes.io/instance": "k8s",
|
||||
"app.kubernetes.io/name": "prometheus",
|
||||
"app.kubernetes.io/part-of": "kube-prometheus",
|
||||
"app.kubernetes.io/version": "3.0.1",
|
||||
"app.kubernetes.io/version": "3.1.0",
|
||||
"prometheus": "k8s",
|
||||
"role": "alert-rules"
|
||||
},
|
||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -14,9 +14,9 @@ spec:
|
||||
description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": members are down ({{`{{`}} $value {{`}}`}}).'
|
||||
summary: etcd cluster members are down.
|
||||
expr: "max without (endpoint) (\n sum without (instance, pod) (up{job=~\".*etcd.*\"} == bool 0)\nor\n count without (To) (\n sum without (instance, pod) (rate(etcd_network_peer_sent_failures_total{job=~\".*etcd.*\"}[120s])) > 0.01\n )\n)\n> 0\n"
|
||||
for: 10m
|
||||
for: 20m
|
||||
labels:
|
||||
severity: critical
|
||||
severity: warning
|
||||
- alert: etcdInsufficientMembers
|
||||
annotations:
|
||||
description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": insufficient members ({{`{{`}} $value {{`}}`}}).'
|
||||
|
@ -11,7 +11,7 @@ spec:
|
||||
rules:
|
||||
- alert: KubePodCrashLooping
|
||||
annotations:
|
||||
description: 'Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is in waiting state (reason: "CrashLoopBackOff").'
|
||||
description: 'Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is in waiting state (reason: "CrashLoopBackOff") on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping
|
||||
summary: Pod is crash looping.
|
||||
expr: 'max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", job="kube-state-metrics"}[5m]) >= 1
|
||||
@ -22,7 +22,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubePodNotReady
|
||||
annotations:
|
||||
description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than 15 minutes.
|
||||
description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready
|
||||
summary: Pod has been in a non-ready state for more than 15 minutes.
|
||||
expr: "sum by (namespace, pod, cluster) (\n max by(namespace, pod, cluster) (\n kube_pod_status_phase{job=\"kube-state-metrics\", phase=~\"Pending|Unknown|Failed\"}\n ) * on(namespace, pod, cluster) group_left(owner_kind) topk by(namespace, pod, cluster) (\n 1, max by(namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!=\"Job\"})\n )\n) > 0\n"
|
||||
@ -31,7 +31,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDeploymentGenerationMismatch
|
||||
annotations:
|
||||
description: Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back.
|
||||
description: Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch
|
||||
summary: Deployment generation mismatch due to possible roll-back
|
||||
expr: "kube_deployment_status_observed_generation{job=\"kube-state-metrics\"}\n !=\nkube_deployment_metadata_generation{job=\"kube-state-metrics\"}\n"
|
||||
@ -40,7 +40,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDeploymentReplicasMismatch
|
||||
annotations:
|
||||
description: Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.
|
||||
description: Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch
|
||||
summary: Deployment has not matched the expected number of replicas.
|
||||
expr: "(\n kube_deployment_spec_replicas{job=\"kube-state-metrics\"}\n >\n kube_deployment_status_replicas_available{job=\"kube-state-metrics\"}\n) and (\n changes(kube_deployment_status_replicas_updated{job=\"kube-state-metrics\"}[10m])\n ==\n 0\n)\n"
|
||||
@ -49,7 +49,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDeploymentRolloutStuck
|
||||
annotations:
|
||||
description: Rollout of deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} is not progressing for longer than 15 minutes.
|
||||
description: Rollout of deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} is not progressing for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentrolloutstuck
|
||||
summary: Deployment rollout is not progressing.
|
||||
expr: 'kube_deployment_status_condition{condition="Progressing", status="false",job="kube-state-metrics"}
|
||||
@ -62,7 +62,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetReplicasMismatch
|
||||
annotations:
|
||||
description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.
|
||||
description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch
|
||||
summary: StatefulSet has not matched the expected number of replicas.
|
||||
expr: "(\n kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\"}\n !=\n kube_statefulset_status_replicas{job=\"kube-state-metrics\"}\n) and (\n changes(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\"}[10m])\n ==\n 0\n)\n"
|
||||
@ -71,7 +71,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetGenerationMismatch
|
||||
annotations:
|
||||
description: StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
|
||||
description: StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch
|
||||
summary: StatefulSet generation mismatch due to possible roll-back
|
||||
expr: "kube_statefulset_status_observed_generation{job=\"kube-state-metrics\"}\n !=\nkube_statefulset_metadata_generation{job=\"kube-state-metrics\"}\n"
|
||||
@ -80,7 +80,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetUpdateNotRolledOut
|
||||
annotations:
|
||||
description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out.
|
||||
description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout
|
||||
summary: StatefulSet update has not been rolled out.
|
||||
expr: "(\n max by(namespace, statefulset, job, cluster) (\n kube_statefulset_status_current_revision{job=\"kube-state-metrics\"}\n unless\n kube_statefulset_status_update_revision{job=\"kube-state-metrics\"}\n )\n *\n (\n kube_statefulset_replicas{job=\"kube-state-metrics\"}\n !=\n kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\"}\n )\n) and (\n changes(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n"
|
||||
@ -89,7 +89,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetRolloutStuck
|
||||
annotations:
|
||||
description: DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15m.
|
||||
description: DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15m on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck
|
||||
summary: DaemonSet rollout is stuck.
|
||||
expr: "(\n (\n kube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_misscheduled{job=\"kube-state-metrics\"}\n !=\n 0\n ) or (\n kube_daemonset_status_updated_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_available{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n )\n) and (\n changes(kube_daemonset_status_updated_number_scheduled{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n"
|
||||
@ -98,7 +98,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeContainerWaiting
|
||||
annotations:
|
||||
description: 'pod/{{`{{`}} $labels.pod {{`}}`}} in namespace {{`{{`}} $labels.namespace {{`}}`}} on container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour. (reason: "{{`{{`}} $labels.reason {{`}}`}}").'
|
||||
description: 'pod/{{`{{`}} $labels.pod {{`}}`}} in namespace {{`{{`}} $labels.namespace {{`}}`}} on container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour. (reason: "{{`{{`}} $labels.reason {{`}}`}}") on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting
|
||||
summary: Pod container waiting longer than 1 hour
|
||||
expr: 'kube_pod_container_status_waiting_reason{reason!="CrashLoopBackOff", job="kube-state-metrics"} > 0
|
||||
@ -109,7 +109,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetNotScheduled
|
||||
annotations:
|
||||
description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled.'
|
||||
description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled
|
||||
summary: DaemonSet pods are not scheduled.
|
||||
expr: "kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n -\nkube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"} > 0\n"
|
||||
@ -118,7 +118,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetMisScheduled
|
||||
annotations:
|
||||
description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run.'
|
||||
description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled
|
||||
summary: DaemonSet pods are misscheduled.
|
||||
expr: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
|
||||
@ -129,7 +129,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeJobNotCompleted
|
||||
annotations:
|
||||
description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than {{`{{`}} "43200" | humanizeDuration {{`}}`}} to complete.
|
||||
description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than {{`{{`}} "43200" | humanizeDuration {{`}}`}} to complete on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted
|
||||
summary: Job did not complete in time
|
||||
expr: "time() - max by(namespace, job_name, cluster) (kube_job_status_start_time{job=\"kube-state-metrics\"}\n and\nkube_job_status_active{job=\"kube-state-metrics\"} > 0) > 43200\n"
|
||||
@ -137,7 +137,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeJobFailed
|
||||
annotations:
|
||||
description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete. Removing failed job after investigation should clear this alert.
|
||||
description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete. Removing failed job after investigation should clear this alert on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed
|
||||
summary: Job failed to complete.
|
||||
expr: 'kube_job_failed{job="kube-state-metrics"} > 0
|
||||
@ -148,7 +148,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeHpaReplicasMismatch
|
||||
annotations:
|
||||
description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes.
|
||||
description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch
|
||||
summary: HPA has not matched desired number of replicas.
|
||||
expr: "(kube_horizontalpodautoscaler_status_desired_replicas{job=\"kube-state-metrics\"}\n !=\nkube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"})\n and\n(kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"}\n >\nkube_horizontalpodautoscaler_spec_min_replicas{job=\"kube-state-metrics\"})\n and\n(kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"}\n <\nkube_horizontalpodautoscaler_spec_max_replicas{job=\"kube-state-metrics\"})\n and\nchanges(kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"}[15m]) == 0\n"
|
||||
@ -157,7 +157,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeHpaMaxedOut
|
||||
annotations:
|
||||
description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has been running at max replicas for longer than 15 minutes.
|
||||
description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has been running at max replicas for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout
|
||||
summary: HPA is running at max replicas
|
||||
expr: "kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"}\n ==\nkube_horizontalpodautoscaler_spec_max_replicas{job=\"kube-state-metrics\"}\n"
|
||||
@ -186,7 +186,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeQuotaAlmostFull
|
||||
annotations:
|
||||
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.
|
||||
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull
|
||||
summary: Namespace quota is going to be full.
|
||||
expr: "kube_resourcequota{job=\"kube-state-metrics\", type=\"used\"}\n / ignoring(instance, job, type)\n(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\"} > 0)\n > 0.9 < 1\n"
|
||||
@ -195,7 +195,7 @@ spec:
|
||||
severity: info
|
||||
- alert: KubeQuotaFullyUsed
|
||||
annotations:
|
||||
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.
|
||||
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused
|
||||
summary: Namespace quota is fully used.
|
||||
expr: "kube_resourcequota{job=\"kube-state-metrics\", type=\"used\"}\n / ignoring(instance, job, type)\n(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\"} > 0)\n == 1\n"
|
||||
@ -204,7 +204,7 @@ spec:
|
||||
severity: info
|
||||
- alert: KubeQuotaExceeded
|
||||
annotations:
|
||||
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.
|
||||
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded
|
||||
summary: Namespace quota has exceeded the limits.
|
||||
expr: "kube_resourcequota{job=\"kube-state-metrics\", type=\"used\"}\n / ignoring(instance, job, type)\n(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\"} > 0)\n > 1\n"
|
||||
@ -213,7 +213,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: CPUThrottlingHigh
|
||||
annotations:
|
||||
description: '{{`{{`}} $value | humanizePercentage {{`}}`}} throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container {{`}}`}} in pod {{`{{`}} $labels.pod {{`}}`}}.'
|
||||
description: '{{`{{`}} $value | humanizePercentage {{`}}`}} throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container {{`}}`}} in pod {{`{{`}} $labels.pod {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh
|
||||
summary: Processes experience elevated CPU throttling.
|
||||
expr: "sum(increase(container_cpu_cfs_throttled_periods_total{container!=\"\", job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", }[5m])) without (id, metrics_path, name, image, endpoint, job, node)\n /\nsum(increase(container_cpu_cfs_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", }[5m])) without (id, metrics_path, name, image, endpoint, job, node)\n > ( 25 / 100 )\n"
|
||||
@ -281,7 +281,7 @@ spec:
|
||||
rules:
|
||||
- alert: KubeVersionMismatch
|
||||
annotations:
|
||||
description: There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running.
|
||||
description: There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch
|
||||
summary: Different semantic versions of Kubernetes components running.
|
||||
expr: 'count by (cluster) (count by (git_version, cluster) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"git_version","$1","git_version","(v[0-9]*.[0-9]*).*"))) > 1
|
||||
@ -292,7 +292,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeClientErrors
|
||||
annotations:
|
||||
description: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} $value | humanizePercentage {{`}}`}} errors.'
|
||||
description: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} $value | humanizePercentage {{`}}`}} errors on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors
|
||||
summary: Kubernetes API server client is experiencing errors.
|
||||
expr: "(sum(rate(rest_client_requests_total{job=\"apiserver\",code=~\"5..\"}[5m])) by (cluster, instance, job, namespace)\n /\nsum(rate(rest_client_requests_total{job=\"apiserver\"}[5m])) by (cluster, instance, job, namespace))\n> 0.01\n"
|
||||
@ -303,7 +303,7 @@ spec:
|
||||
rules:
|
||||
- alert: KubeAPIErrorBudgetBurn
|
||||
annotations:
|
||||
description: The API server is burning too much error budget.
|
||||
description: The API server is burning too much error budget on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn
|
||||
summary: The API server is burning too much error budget.
|
||||
expr: 'sum by(cluster) (apiserver_request:burnrate1h) > (14.40 * 0.01000)
|
||||
@ -320,7 +320,7 @@ spec:
|
||||
short: 5m
|
||||
- alert: KubeAPIErrorBudgetBurn
|
||||
annotations:
|
||||
description: The API server is burning too much error budget.
|
||||
description: The API server is burning too much error budget on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn
|
||||
summary: The API server is burning too much error budget.
|
||||
expr: 'sum by(cluster) (apiserver_request:burnrate6h) > (6.00 * 0.01000)
|
||||
@ -337,7 +337,7 @@ spec:
|
||||
short: 30m
|
||||
- alert: KubeAPIErrorBudgetBurn
|
||||
annotations:
|
||||
description: The API server is burning too much error budget.
|
||||
description: The API server is burning too much error budget on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn
|
||||
summary: The API server is burning too much error budget.
|
||||
expr: 'sum by(cluster) (apiserver_request:burnrate1d) > (3.00 * 0.01000)
|
||||
@ -354,7 +354,7 @@ spec:
|
||||
short: 2h
|
||||
- alert: KubeAPIErrorBudgetBurn
|
||||
annotations:
|
||||
description: The API server is burning too much error budget.
|
||||
description: The API server is burning too much error budget on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn
|
||||
summary: The API server is burning too much error budget.
|
||||
expr: 'sum by(cluster) (apiserver_request:burnrate3d) > (1.00 * 0.01000)
|
||||
@ -403,17 +403,18 @@ spec:
|
||||
severity: critical
|
||||
- alert: KubeAggregatedAPIErrors
|
||||
annotations:
|
||||
description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has reported errors. It has appeared unavailable {{`{{`}} $value | humanize {{`}}`}} times averaged over the past 10m.
|
||||
description: Kubernetes aggregated API {{`{{`}} $labels.instance {{`}}`}}/{{`{{`}} $labels.name {{`}}`}} has reported {{`{{`}} $labels.reason {{`}}`}} errors on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors
|
||||
summary: Kubernetes aggregated API has reported errors.
|
||||
expr: 'sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[10m])) > 4
|
||||
expr: 'sum by(cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[1m])) > 0
|
||||
|
||||
'
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeAggregatedAPIDown
|
||||
annotations:
|
||||
description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m.
|
||||
description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown
|
||||
summary: Kubernetes aggregated API is down.
|
||||
expr: '(1 - max by(name, namespace, cluster)(avg_over_time(aggregator_unavailable_apiservice{job="apiserver"}[10m]))) * 100 < 85
|
||||
@ -435,7 +436,7 @@ spec:
|
||||
severity: critical
|
||||
- alert: KubeAPITerminatedRequests
|
||||
annotations:
|
||||
description: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
|
||||
description: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests
|
||||
summary: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
|
||||
expr: 'sum by(cluster) (rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / ( sum by(cluster) (rate(apiserver_request_total{job="apiserver"}[10m])) + sum by(cluster) (rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20
|
||||
@ -448,18 +449,22 @@ spec:
|
||||
rules:
|
||||
- alert: KubeNodeNotReady
|
||||
annotations:
|
||||
description: '{{`{{`}} $labels.node {{`}}`}} has been unready for more than 15 minutes.'
|
||||
description: '{{`{{`}} $labels.node {{`}}`}} has been unready for more than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodenotready
|
||||
summary: Node is not ready.
|
||||
expr: 'kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
|
||||
|
||||
and on (cluster, node)
|
||||
|
||||
kube_node_spec_unschedulable{job="kube-state-metrics"} == 0
|
||||
|
||||
'
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeNodeUnreachable
|
||||
annotations:
|
||||
description: '{{`{{`}} $labels.node {{`}}`}} is unreachable and some workloads may be rescheduled.'
|
||||
description: '{{`{{`}} $labels.node {{`}}`}} is unreachable and some workloads may be rescheduled on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodeunreachable
|
||||
summary: Node is unreachable.
|
||||
expr: '(kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="kube-state-metrics",key=~"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn"}) == 1
|
||||
@ -470,27 +475,31 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeletTooManyPods
|
||||
annotations:
|
||||
description: Kubelet '{{`{{`}} $labels.node {{`}}`}}' is running at {{`{{`}} $value | humanizePercentage {{`}}`}} of its Pod capacity.
|
||||
description: Kubelet '{{`{{`}} $labels.node {{`}}`}}' is running at {{`{{`}} $value | humanizePercentage {{`}}`}} of its Pod capacity on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods
|
||||
summary: Kubelet is running at capacity.
|
||||
expr: "count by(cluster, node) (\n (kube_pod_status_phase{job=\"kube-state-metrics\",phase=\"Running\"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job=\"kube-state-metrics\"})\n)\n/\nmax by(cluster, node) (\n kube_node_status_capacity{job=\"kube-state-metrics\",resource=\"pods\"} != 1\n) > 0.95\n"
|
||||
expr: "(\n max by (cluster, instance) (\n kubelet_running_pods{job=\"kubelet\", metrics_path=\"/metrics\"} > 1\n )\n * on (cluster, instance) group_left(node)\n max by (cluster, instance, node) (\n kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"}\n )\n)\n/ on (cluster, node) group_left()\nmax by (cluster, node) (\n kube_node_status_capacity{job=\"kube-state-metrics\", resource=\"pods\"} != 1\n) > 0.95\n"
|
||||
for: 15m
|
||||
labels:
|
||||
severity: info
|
||||
- alert: KubeNodeReadinessFlapping
|
||||
annotations:
|
||||
description: The readiness status of node {{`{{`}} $labels.node {{`}}`}} has changed {{`{{`}} $value {{`}}`}} times in the last 15 minutes.
|
||||
description: The readiness status of node {{`{{`}} $labels.node {{`}}`}} has changed {{`{{`}} $value {{`}}`}} times in the last 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping
|
||||
summary: Node readiness status is flapping.
|
||||
expr: 'sum(changes(kube_node_status_condition{job="kube-state-metrics",status="true",condition="Ready"}[15m])) by (cluster, node) > 2
|
||||
|
||||
and on (cluster, node)
|
||||
|
||||
kube_node_spec_unschedulable{job="kube-state-metrics"} == 0
|
||||
|
||||
'
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeletPlegDurationHigh
|
||||
annotations:
|
||||
description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}.
|
||||
description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletplegdurationhigh
|
||||
summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist.
|
||||
expr: 'node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
|
||||
@ -501,7 +510,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeletPodStartUpLatencyHigh
|
||||
annotations:
|
||||
description: Kubelet Pod startup 99th percentile latency is {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}.
|
||||
description: Kubelet Pod startup 99th percentile latency is {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh
|
||||
summary: Kubelet Pod startup latency is too high.
|
||||
expr: 'histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le)) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
|
||||
@ -512,7 +521,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeletClientCertificateExpiration
|
||||
annotations:
|
||||
description: Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.
|
||||
description: Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration
|
||||
summary: Kubelet client certificate is about to expire.
|
||||
expr: 'kubelet_certificate_manager_client_ttl_seconds < 604800
|
||||
@ -522,7 +531,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeletClientCertificateExpiration
|
||||
annotations:
|
||||
description: Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.
|
||||
description: Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration
|
||||
summary: Kubelet client certificate is about to expire.
|
||||
expr: 'kubelet_certificate_manager_client_ttl_seconds < 86400
|
||||
@ -532,7 +541,7 @@ spec:
|
||||
severity: critical
|
||||
- alert: KubeletServerCertificateExpiration
|
||||
annotations:
|
||||
description: Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.
|
||||
description: Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration
|
||||
summary: Kubelet server certificate is about to expire.
|
||||
expr: 'kubelet_certificate_manager_server_ttl_seconds < 604800
|
||||
@ -542,7 +551,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeletServerCertificateExpiration
|
||||
annotations:
|
||||
description: Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.
|
||||
description: Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration
|
||||
summary: Kubelet server certificate is about to expire.
|
||||
expr: 'kubelet_certificate_manager_server_ttl_seconds < 86400
|
||||
@ -552,7 +561,7 @@ spec:
|
||||
severity: critical
|
||||
- alert: KubeletClientCertificateRenewalErrors
|
||||
annotations:
|
||||
description: Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its client certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes).
|
||||
description: Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its client certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes) on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificaterenewalerrors
|
||||
summary: Kubelet has failed to renew its client certificate.
|
||||
expr: 'increase(kubelet_certificate_manager_client_expiration_renew_errors[5m]) > 0
|
||||
@ -563,7 +572,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeletServerCertificateRenewalErrors
|
||||
annotations:
|
||||
description: Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its server certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes).
|
||||
description: Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its server certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes) on cluster {{`{{`}} $labels.cluster {{`}}`}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificaterenewalerrors
|
||||
summary: Kubelet has failed to renew its server certificate.
|
||||
expr: 'increase(kubelet_server_expiration_renew_errors[5m]) > 0
|
||||
@ -640,20 +649,20 @@ spec:
|
||||
|
||||
'
|
||||
record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h
|
||||
- expr: 'sum by (cluster, verb, scope) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{le="+Inf"} * 24 * 30)
|
||||
- expr: 'sum by (cluster, verb, scope) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{le="+Inf"})
|
||||
|
||||
'
|
||||
record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d
|
||||
- expr: "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"\
|
||||
cluster\",le=\"30\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n"
|
||||
- expr: "1 - (\n (\n # write too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=~\"1(\\\\.0)?\"})\n ) +\n (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=~\"5(\\\\.0)?\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"\
|
||||
LIST|GET\",scope=\"cluster\",le=~\"30(\\\\.0)?\"})\n )\n ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d)\n"
|
||||
labels:
|
||||
verb: all
|
||||
record: apiserver_request:availability30d
|
||||
- expr: "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"1\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=\"5\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=\"30\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n"
|
||||
- expr: "1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"LIST|GET\"})\n -\n (\n # too slow\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"})\n or\n vector(0)\n )\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"namespace\",le=~\"5(\\\\.0)?\"})\n +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"LIST|GET\",scope=\"cluster\",le=~\"30(\\\\.0)?\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"read\"})\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:availability30d
|
||||
- expr: "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n"
|
||||
- expr: "1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"POST|PUT|PATCH|DELETE\",le=~\"1(\\\\.0)?\"})\n )\n +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum by (cluster) (code:apiserver_request_total:increase30d{verb=\"write\"})\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:availability30d
|
||||
@ -687,66 +696,66 @@ spec:
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- name: kube-apiserver-burnrate.rules
|
||||
rules:
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
|
||||
5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[1d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[1d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[1d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\
|
||||
,verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate1d
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
|
||||
5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[1h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[1h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[1h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\
|
||||
,verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate1h
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
|
||||
5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[2h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[2h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[2h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\
|
||||
,verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate2h
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"\
|
||||
,code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[30m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[30m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[30m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"\
|
||||
apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate30m
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
|
||||
5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[3d]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[3d]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[3d]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\
|
||||
,verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate3d
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
|
||||
5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[5m]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[5m]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[5m]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\
|
||||
,verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate5m
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=\"1\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=\"5\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=\"30\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"\
|
||||
5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[6h]))\n or\n vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"namespace\",le=~\"5(\\\\.0)?\"}[6h]))\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"cluster\",le=~\"30(\\\\.0)?\"}[6h]))\n )\n )\n +\n # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\
|
||||
,verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n"
|
||||
labels:
|
||||
verb: read
|
||||
record: apiserver_request:burnrate6h
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate1d
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate1h
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate2h
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate30m
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate3d
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate5m
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=\"1\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n"
|
||||
- expr: "(\n (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\",le=~\"1(\\\\.0)?\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n"
|
||||
labels:
|
||||
verb: write
|
||||
record: apiserver_request:burnrate6h
|
||||
@ -916,4 +925,122 @@ spec:
|
||||
labels:
|
||||
quantile: '0.5'
|
||||
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
|
||||
- name: windows.node.rules
|
||||
rules:
|
||||
- expr: "count by (cluster) (\n windows_system_system_up_time{job=\"kubernetes-windows-exporter\"}\n)\n"
|
||||
record: node:windows_node:sum
|
||||
- expr: "count by (cluster, instance) (sum by (cluster, instance, core) (\n windows_cpu_time_total{job=\"kubernetes-windows-exporter\"}\n))\n"
|
||||
record: node:windows_node_num_cpu:sum
|
||||
- expr: '1 - avg by (cluster) (rate(windows_cpu_time_total{job="kubernetes-windows-exporter",mode="idle"}[1m]))
|
||||
|
||||
'
|
||||
record: :windows_node_cpu_utilisation:avg1m
|
||||
- expr: "1 - avg by (cluster, instance) (\n rate(windows_cpu_time_total{job=\"kubernetes-windows-exporter\",mode=\"idle\"}[1m])\n)\n"
|
||||
record: node:windows_node_cpu_utilisation:avg1m
|
||||
- expr: '1 -
|
||||
|
||||
sum by (cluster) (windows_memory_available_bytes{job="kubernetes-windows-exporter"})
|
||||
|
||||
/
|
||||
|
||||
sum by (cluster) (windows_os_visible_memory_bytes{job="kubernetes-windows-exporter"})
|
||||
|
||||
'
|
||||
record: ':windows_node_memory_utilisation:'
|
||||
- expr: 'sum by (cluster) (windows_memory_available_bytes{job="kubernetes-windows-exporter"} + windows_memory_cache_bytes{job="kubernetes-windows-exporter"})
|
||||
|
||||
'
|
||||
record: :windows_node_memory_MemFreeCached_bytes:sum
|
||||
- expr: '(windows_memory_cache_bytes{job="kubernetes-windows-exporter"} + windows_memory_modified_page_list_bytes{job="kubernetes-windows-exporter"} + windows_memory_standby_cache_core_bytes{job="kubernetes-windows-exporter"} + windows_memory_standby_cache_normal_priority_bytes{job="kubernetes-windows-exporter"} + windows_memory_standby_cache_reserve_bytes{job="kubernetes-windows-exporter"})
|
||||
|
||||
'
|
||||
record: node:windows_node_memory_totalCached_bytes:sum
|
||||
- expr: 'sum by (cluster) (windows_os_visible_memory_bytes{job="kubernetes-windows-exporter"})
|
||||
|
||||
'
|
||||
record: :windows_node_memory_MemTotal_bytes:sum
|
||||
- expr: "sum by (cluster, instance) (\n (windows_memory_available_bytes{job=\"kubernetes-windows-exporter\"})\n)\n"
|
||||
record: node:windows_node_memory_bytes_available:sum
|
||||
- expr: "sum by (cluster, instance) (\n windows_os_visible_memory_bytes{job=\"kubernetes-windows-exporter\"}\n)\n"
|
||||
record: node:windows_node_memory_bytes_total:sum
|
||||
- expr: '(node:windows_node_memory_bytes_total:sum - node:windows_node_memory_bytes_available:sum)
|
||||
|
||||
/
|
||||
|
||||
scalar(sum(node:windows_node_memory_bytes_total:sum))
|
||||
|
||||
'
|
||||
record: node:windows_node_memory_utilisation:ratio
|
||||
- expr: '1 - (node:windows_node_memory_bytes_available:sum / node:windows_node_memory_bytes_total:sum)
|
||||
|
||||
'
|
||||
record: 'node:windows_node_memory_utilisation:'
|
||||
- expr: 'irate(windows_memory_swap_page_operations_total{job="kubernetes-windows-exporter"}[5m])
|
||||
|
||||
'
|
||||
record: node:windows_node_memory_swap_io_pages:irate
|
||||
- expr: "avg by (cluster) (irate(windows_logical_disk_read_seconds_total{job=\"kubernetes-windows-exporter\"}[1m]) +\n irate(windows_logical_disk_write_seconds_total{job=\"kubernetes-windows-exporter\"}[1m])\n )\n"
|
||||
record: :windows_node_disk_utilisation:avg_irate
|
||||
- expr: "avg by (cluster, instance) (\n (irate(windows_logical_disk_read_seconds_total{job=\"kubernetes-windows-exporter\"}[1m]) +\n irate(windows_logical_disk_write_seconds_total{job=\"kubernetes-windows-exporter\"}[1m]))\n)\n"
|
||||
record: node:windows_node_disk_utilisation:avg_irate
|
||||
- expr: "max by (cluster,instance,volume)(\n (windows_logical_disk_size_bytes{job=\"kubernetes-windows-exporter\"}\n- windows_logical_disk_free_bytes{job=\"kubernetes-windows-exporter\"})\n/ windows_logical_disk_size_bytes{job=\"kubernetes-windows-exporter\"}\n)\n"
|
||||
record: 'node:windows_node_filesystem_usage:'
|
||||
- expr: 'max by (cluster, instance, volume) (windows_logical_disk_free_bytes{job="kubernetes-windows-exporter"} / windows_logical_disk_size_bytes{job="kubernetes-windows-exporter"})
|
||||
|
||||
'
|
||||
record: 'node:windows_node_filesystem_avail:'
|
||||
- expr: 'sum by (cluster) (irate(windows_net_bytes_total{job="kubernetes-windows-exporter"}[1m]))
|
||||
|
||||
'
|
||||
record: :windows_node_net_utilisation:sum_irate
|
||||
- expr: "sum by (cluster, instance) (\n (irate(windows_net_bytes_total{job=\"kubernetes-windows-exporter\"}[1m]))\n)\n"
|
||||
record: node:windows_node_net_utilisation:sum_irate
|
||||
- expr: 'sum by (cluster) (irate(windows_net_packets_received_discarded_total{job="kubernetes-windows-exporter"}[1m])) +
|
||||
|
||||
sum by (cluster) (irate(windows_net_packets_outbound_discarded_total{job="kubernetes-windows-exporter"}[1m]))
|
||||
|
||||
'
|
||||
record: :windows_node_net_saturation:sum_irate
|
||||
- expr: "sum by (cluster, instance) (\n (irate(windows_net_packets_received_discarded_total{job=\"kubernetes-windows-exporter\"}[1m]) +\n irate(windows_net_packets_outbound_discarded_total{job=\"kubernetes-windows-exporter\"}[1m]))\n)\n"
|
||||
record: node:windows_node_net_saturation:sum_irate
|
||||
- name: windows.pod.rules
|
||||
rules:
|
||||
- expr: 'windows_container_available{job="kubernetes-windows-exporter", container_id != ""} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace, cluster)
|
||||
|
||||
'
|
||||
record: windows_pod_container_available
|
||||
- expr: 'windows_container_cpu_usage_seconds_total{job="kubernetes-windows-exporter", container_id != ""} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace, cluster)
|
||||
|
||||
'
|
||||
record: windows_container_total_runtime
|
||||
- expr: 'windows_container_memory_usage_commit_bytes{job="kubernetes-windows-exporter", container_id != ""} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace, cluster)
|
||||
|
||||
'
|
||||
record: windows_container_memory_usage
|
||||
- expr: 'windows_container_memory_usage_private_working_set_bytes{job="kubernetes-windows-exporter", container_id != ""} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace, cluster)
|
||||
|
||||
'
|
||||
record: windows_container_private_working_set_usage
|
||||
- expr: 'windows_container_network_receive_bytes_total{job="kubernetes-windows-exporter", container_id != ""} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace, cluster)
|
||||
|
||||
'
|
||||
record: windows_container_network_received_bytes_total
|
||||
- expr: 'windows_container_network_transmit_bytes_total{job="kubernetes-windows-exporter", container_id != ""} * on(container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace, cluster)
|
||||
|
||||
'
|
||||
record: windows_container_network_transmitted_bytes_total
|
||||
- expr: "max by (cluster, namespace, pod, container) (\n kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"}\n) * on(container,pod,namespace,cluster) (windows_pod_container_available)\n"
|
||||
record: kube_pod_windows_container_resource_memory_request
|
||||
- expr: 'kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} * on(container,pod,namespace,cluster) (windows_pod_container_available)
|
||||
|
||||
'
|
||||
record: kube_pod_windows_container_resource_memory_limit
|
||||
- expr: "max by (cluster, namespace, pod, container) (\n kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"}\n) * on(container,pod,namespace,cluster) (windows_pod_container_available)\n"
|
||||
record: kube_pod_windows_container_resource_cpu_cores_request
|
||||
- expr: 'kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} * on(container,pod,namespace,cluster) (windows_pod_container_available)
|
||||
|
||||
'
|
||||
record: kube_pod_windows_container_resource_cpu_cores_limit
|
||||
- expr: "sum by (cluster, namespace, pod, container) (\n rate(windows_container_total_runtime{}[5m])\n)\n"
|
||||
record: namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate
|
||||
|
||||
|
@ -105,7 +105,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: NodeHighNumberConntrackEntriesUsed
|
||||
annotations:
|
||||
description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of conntrack entries are used.'
|
||||
description: '{{`{{`}} $labels.instance {{`}}`}} {{`{{`}} $value | humanizePercentage {{`}}`}} of conntrack entries are used.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused
|
||||
summary: Number of conntrack are getting close to the limit.
|
||||
expr: '(node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit) > 0.75
|
||||
@ -193,7 +193,7 @@ spec:
|
||||
'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodecpuhighusage
|
||||
summary: High CPU usage.
|
||||
expr: 'sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter", mode!="idle"}[2m]))) * 100 > 90
|
||||
expr: 'sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter", mode!~"idle|iowait"}[2m]))) * 100 > 90
|
||||
|
||||
'
|
||||
for: 15m
|
||||
|
@ -8,7 +8,6 @@ icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
|
||||
keywords:
|
||||
- kubezero
|
||||
- rabbitmq
|
||||
- rabbitmq-cluster-operator
|
||||
- nats
|
||||
maintainers:
|
||||
- name: Stefan Reimer
|
||||
@ -25,8 +24,4 @@ dependencies:
|
||||
version: 14.6.6
|
||||
repository: https://charts.bitnami.com/bitnami
|
||||
condition: rabbitmq.enabled
|
||||
- name: rabbitmq-cluster-operator
|
||||
version: 4.3.19
|
||||
repository: https://charts.bitnami.com/bitnami
|
||||
condition: rabbitmq-cluster-operator.enabled
|
||||
kubeVersion: ">= 1.26.0"
|
||||
|
@ -20,7 +20,6 @@ Kubernetes: `>= 1.26.0`
|
||||
|------------|------|---------|
|
||||
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.6 |
|
||||
| https://charts.bitnami.com/bitnami | rabbitmq | 14.6.6 |
|
||||
| https://charts.bitnami.com/bitnami | rabbitmq-cluster-operator | 4.3.19 |
|
||||
| https://nats-io.github.io/k8s/helm/charts/ | nats | 1.2.2 |
|
||||
|
||||
## Values
|
||||
|
@ -1,4 +1,4 @@
|
||||
{{- if .Values.nats.promExporter.podMonitor.enabled }}
|
||||
{{- if .Values.nats.exporter.serviceMonitor.enabled }}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
|
@ -22,25 +22,6 @@ nats:
|
||||
# url: "mq.example.com"
|
||||
gateway: istio-ingress/private-ingressgateway
|
||||
|
||||
# rabbitmq cluster operator
|
||||
rabbitmq-cluster-operator:
|
||||
enabled: false
|
||||
rabbitmqImage:
|
||||
tag: 3.11.4-debian-11-r0
|
||||
|
||||
useCertManager: true
|
||||
|
||||
clusterOperator:
|
||||
metrics:
|
||||
enabled: false
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
|
||||
msgTopologyOperator:
|
||||
metrics:
|
||||
enabled: false
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
|
||||
# rabbitmq
|
||||
rabbitmq:
|
||||
|
@ -2,7 +2,7 @@ apiVersion: v2
|
||||
name: kubezero-operators
|
||||
description: Various operators supported by KubeZero
|
||||
type: application
|
||||
version: 0.1.8
|
||||
version: 0.2.0
|
||||
home: https://kubezero.com
|
||||
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
|
||||
keywords:
|
||||
@ -11,6 +11,7 @@ keywords:
|
||||
- opensearch
|
||||
- postgres
|
||||
- kafka
|
||||
- rabbitmq
|
||||
maintainers:
|
||||
- name: Stefan Reimer
|
||||
email: stefan@zero-downtime.net
|
||||
@ -23,15 +24,19 @@ dependencies:
|
||||
repository: https://opensearch-project.github.io/opensearch-k8s-operator/
|
||||
condition: opensearch-operator.enabled
|
||||
- name: eck-operator
|
||||
version: 2.15.0
|
||||
version: 2.16.1
|
||||
repository: https://helm.elastic.co
|
||||
condition: eck-operator.enabled
|
||||
- name: cloudnative-pg
|
||||
version: 0.22.1
|
||||
version: 0.23.0
|
||||
repository: https://cloudnative-pg.github.io/charts
|
||||
condition: cloudnative-pg.enabled
|
||||
- name: strimzi-kafka-operator
|
||||
version: 0.44.0
|
||||
version: 0.45.0
|
||||
repository: "oci://quay.io/strimzi-helm"
|
||||
condition: strimzi-kafka-operator.enabled
|
||||
kubeVersion: ">= 1.26.0"
|
||||
- name: rabbitmq-cluster-operator
|
||||
version: 4.4.3
|
||||
repository: https://charts.bitnami.com/bitnami
|
||||
condition: rabbitmq-cluster-operator.enabled
|
||||
kubeVersion: ">= 1.30.0-0"
|
||||
|
@ -1,6 +1,6 @@
|
||||
# kubezero-operators
|
||||
|
||||
![Version: 0.1.7](https://img.shields.io/badge/Version-0.1.7-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
|
||||
![Version: 0.2.0](https://img.shields.io/badge/Version-0.2.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
|
||||
|
||||
Various operators supported by KubeZero
|
||||
|
||||
@ -14,15 +14,16 @@ Various operators supported by KubeZero
|
||||
|
||||
## Requirements
|
||||
|
||||
Kubernetes: `>= 1.26.0`
|
||||
Kubernetes: `>= 1.30.0-0`
|
||||
|
||||
| Repository | Name | Version |
|
||||
|------------|------|---------|
|
||||
| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.6 |
|
||||
| https://cloudnative-pg.github.io/charts | cloudnative-pg | 0.22.1 |
|
||||
| https://helm.elastic.co | eck-operator | 2.15.0 |
|
||||
| https://charts.bitnami.com/bitnami | rabbitmq-cluster-operator | 4.4.3 |
|
||||
| https://cloudnative-pg.github.io/charts | cloudnative-pg | 0.23.0 |
|
||||
| https://helm.elastic.co | eck-operator | 2.16.1 |
|
||||
| https://opensearch-project.github.io/opensearch-k8s-operator/ | opensearch-operator | 2.7.0 |
|
||||
| oci://quay.io/strimzi-helm | strimzi-kafka-operator | 0.44.0 |
|
||||
| oci://quay.io/strimzi-helm | strimzi-kafka-operator | 0.45.0 |
|
||||
|
||||
## Values
|
||||
|
||||
@ -31,32 +32,23 @@ Kubernetes: `>= 1.26.0`
|
||||
| cloudnative-pg.enabled | bool | `false` | |
|
||||
| cloudnative-pg.monitoring.grafanaDashboard.create | bool | `false` | |
|
||||
| cloudnative-pg.monitoring.podMonitorEnabled | bool | `false` | |
|
||||
| cloudnative-pg.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` | |
|
||||
| cloudnative-pg.tolerations[0].effect | string | `"NoSchedule"` | |
|
||||
| cloudnative-pg.tolerations[0].key | string | `"node-role.kubernetes.io/control-plane"` | |
|
||||
| eck-operator.enabled | bool | `false` | |
|
||||
| eck-operator.installCRDs | bool | `false` | |
|
||||
| eck-operator.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` | |
|
||||
| eck-operator.tolerations[0].effect | string | `"NoSchedule"` | |
|
||||
| eck-operator.tolerations[0].key | string | `"node-role.kubernetes.io/control-plane"` | |
|
||||
| opensearch-operator.enabled | bool | `false` | |
|
||||
| opensearch-operator.fullnameOverride | string | `"opensearch-operator"` | |
|
||||
| opensearch-operator.kubeRbacProxy.enable | bool | `false` | |
|
||||
| opensearch-operator.manager.extraEnv[0].name | string | `"SKIP_INIT_CONTAINER"` | |
|
||||
| opensearch-operator.manager.extraEnv[0].value | string | `"true"` | |
|
||||
| opensearch-operator.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` | |
|
||||
| opensearch-operator.tolerations[0].effect | string | `"NoSchedule"` | |
|
||||
| opensearch-operator.tolerations[0].key | string | `"node-role.kubernetes.io/control-plane"` | |
|
||||
| rabbitmq-cluster-operator.clusterOperator.metrics.enabled | bool | `false` | |
|
||||
| rabbitmq-cluster-operator.clusterOperator.metrics.serviceMonitor.enabled | bool | `true` | |
|
||||
| rabbitmq-cluster-operator.enabled | bool | `false` | |
|
||||
| rabbitmq-cluster-operator.msgTopologyOperator.metrics.enabled | bool | `false` | |
|
||||
| rabbitmq-cluster-operator.msgTopologyOperator.metrics.serviceMonitor.enabled | bool | `true` | |
|
||||
| rabbitmq-cluster-operator.useCertManager | bool | `true` | |
|
||||
| strimzi-kafka-operator.enabled | bool | `false` | |
|
||||
| strimzi-kafka-operator.leaderElection.enable | bool | `false` | |
|
||||
| strimzi-kafka-operator.monitoring.podMonitorEnabled | bool | `false` | |
|
||||
| strimzi-kafka-operator.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` | |
|
||||
| strimzi-kafka-operator.resources.limits.memory | string | `"384Mi"` | |
|
||||
| strimzi-kafka-operator.resources.requests.cpu | string | `"20m"` | |
|
||||
| strimzi-kafka-operator.resources.requests.memory | string | `"256Mi"` | |
|
||||
| strimzi-kafka-operator.revisionHistoryLimit | int | `3` | |
|
||||
| strimzi-kafka-operator.tolerations[0].effect | string | `"NoSchedule"` | |
|
||||
| strimzi-kafka-operator.tolerations[0].key | string | `"node-role.kubernetes.io/control-plane"` | |
|
||||
| strimzi-kafka-operator.revisionHistoryLimit | int | `2` | |
|
||||
| strimzi-kafka-operator.watchAnyNamespace | bool | `true` | |
|
||||
|
||||
----------------------------------------------
|
||||
|
@ -1,5 +1,5 @@
|
||||
apiVersion: v2
|
||||
appVersion: 2.15.0
|
||||
appVersion: 2.16.1
|
||||
description: Elastic Cloud on Kubernetes (ECK) operator
|
||||
home: https://github.com/elastic/cloud-on-k8s
|
||||
icon: https://helm.elastic.co/icons/eck.png
|
||||
@ -18,4 +18,4 @@ maintainers:
|
||||
name: Elastic
|
||||
name: eck-operator
|
||||
type: application
|
||||
version: 2.15.0
|
||||
version: 2.16.1
|
||||
|
@ -4,14 +4,14 @@ apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.16.4
|
||||
controller-gen.kubebuilder.io/version: v0.16.5
|
||||
helm.sh/resource-policy: keep
|
||||
labels:
|
||||
app.kubernetes.io/instance: 'logging'
|
||||
app.kubernetes.io/managed-by: 'Helm'
|
||||
app.kubernetes.io/name: 'eck-operator-crds'
|
||||
app.kubernetes.io/version: '2.15.0'
|
||||
helm.sh/chart: 'eck-operator-crds-2.15.0'
|
||||
app.kubernetes.io/version: '2.16.1'
|
||||
helm.sh/chart: 'eck-operator-crds-2.16.1'
|
||||
name: agents.agent.k8s.elastic.co
|
||||
spec:
|
||||
group: agent.k8s.elastic.co
|
||||
@ -1137,14 +1137,14 @@ apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.16.4
|
||||
controller-gen.kubebuilder.io/version: v0.16.5
|
||||
helm.sh/resource-policy: keep
|
||||
labels:
|
||||
app.kubernetes.io/instance: 'logging'
|
||||
app.kubernetes.io/managed-by: 'Helm'
|
||||
app.kubernetes.io/name: 'eck-operator-crds'
|
||||
app.kubernetes.io/version: '2.15.0'
|
||||
helm.sh/chart: 'eck-operator-crds-2.15.0'
|
||||
app.kubernetes.io/version: '2.16.1'
|
||||
helm.sh/chart: 'eck-operator-crds-2.16.1'
|
||||
name: apmservers.apm.k8s.elastic.co
|
||||
spec:
|
||||
group: apm.k8s.elastic.co
|
||||
@ -2372,14 +2372,14 @@ apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.16.4
|
||||
controller-gen.kubebuilder.io/version: v0.16.5
|
||||
helm.sh/resource-policy: keep
|
||||
labels:
|
||||
app.kubernetes.io/instance: 'logging'
|
||||
app.kubernetes.io/managed-by: 'Helm'
|
||||
app.kubernetes.io/name: 'eck-operator-crds'
|
||||
app.kubernetes.io/version: '2.15.0'
|
||||
helm.sh/chart: 'eck-operator-crds-2.15.0'
|
||||
app.kubernetes.io/version: '2.16.1'
|
||||
helm.sh/chart: 'eck-operator-crds-2.16.1'
|
||||
name: beats.beat.k8s.elastic.co
|
||||
spec:
|
||||
group: beat.k8s.elastic.co
|
||||
@ -2854,14 +2854,14 @@ apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.16.4
|
||||
controller-gen.kubebuilder.io/version: v0.16.5
|
||||
helm.sh/resource-policy: keep
|
||||
labels:
|
||||
app.kubernetes.io/instance: 'logging'
|
||||
app.kubernetes.io/managed-by: 'Helm'
|
||||
app.kubernetes.io/name: 'eck-operator-crds'
|
||||
app.kubernetes.io/version: '2.15.0'
|
||||
helm.sh/chart: 'eck-operator-crds-2.15.0'
|
||||
app.kubernetes.io/version: '2.16.1'
|
||||
helm.sh/chart: 'eck-operator-crds-2.16.1'
|
||||
name: elasticmapsservers.maps.k8s.elastic.co
|
||||
spec:
|
||||
group: maps.k8s.elastic.co
|
||||
@ -3459,14 +3459,14 @@ apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.16.4
|
||||
controller-gen.kubebuilder.io/version: v0.16.5
|
||||
helm.sh/resource-policy: keep
|
||||
labels:
|
||||
app.kubernetes.io/instance: 'logging'
|
||||
app.kubernetes.io/managed-by: 'Helm'
|
||||
app.kubernetes.io/name: 'eck-operator-crds'
|
||||
app.kubernetes.io/version: '2.15.0'
|
||||
helm.sh/chart: 'eck-operator-crds-2.15.0'
|
||||
app.kubernetes.io/version: '2.16.1'
|
||||
helm.sh/chart: 'eck-operator-crds-2.16.1'
|
||||
name: elasticsearchautoscalers.autoscaling.k8s.elastic.co
|
||||
spec:
|
||||
group: autoscaling.k8s.elastic.co
|
||||
@ -3818,14 +3818,14 @@ apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.16.4
|
||||
controller-gen.kubebuilder.io/version: v0.16.5
|
||||
helm.sh/resource-policy: keep
|
||||
labels:
|
||||
app.kubernetes.io/instance: 'logging'
|
||||
app.kubernetes.io/managed-by: 'Helm'
|
||||
app.kubernetes.io/name: 'eck-operator-crds'
|
||||
app.kubernetes.io/version: '2.15.0'
|
||||
helm.sh/chart: 'eck-operator-crds-2.15.0'
|
||||
app.kubernetes.io/version: '2.16.1'
|
||||
helm.sh/chart: 'eck-operator-crds-2.16.1'
|
||||
name: elasticsearches.elasticsearch.k8s.elastic.co
|
||||
spec:
|
||||
group: elasticsearch.k8s.elastic.co
|
||||
@ -4843,6 +4843,14 @@ spec:
|
||||
type: string
|
||||
type: object
|
||||
type: object
|
||||
remoteClusterServer:
|
||||
description: |-
|
||||
RemoteClusterServer specifies if the remote cluster server should be enabled.
|
||||
This must be enabled if this cluster is a remote cluster which is expected to be accessed using API key authentication.
|
||||
properties:
|
||||
enabled:
|
||||
type: boolean
|
||||
type: object
|
||||
remoteClusters:
|
||||
description: RemoteClusters enables you to establish uni-directional
|
||||
connections to a remote Elasticsearch cluster.
|
||||
@ -4850,6 +4858,55 @@ spec:
|
||||
description: RemoteCluster declares a remote Elasticsearch cluster
|
||||
connection.
|
||||
properties:
|
||||
apiKey:
|
||||
description: 'APIKey can be used to enable remote cluster access
|
||||
using Cross-Cluster API keys: https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-cross-cluster-api-key.html'
|
||||
properties:
|
||||
access:
|
||||
description: Access is the name of the API Key. It is automatically
|
||||
generated if not set or empty.
|
||||
properties:
|
||||
replication:
|
||||
properties:
|
||||
names:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
required:
|
||||
- names
|
||||
type: object
|
||||
search:
|
||||
properties:
|
||||
allow_restricted_indices:
|
||||
type: boolean
|
||||
field_security:
|
||||
properties:
|
||||
except:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
grant:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
required:
|
||||
- except
|
||||
- grant
|
||||
type: object
|
||||
names:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
query:
|
||||
type: object
|
||||
x-kubernetes-preserve-unknown-fields: true
|
||||
required:
|
||||
- names
|
||||
type: object
|
||||
type: object
|
||||
required:
|
||||
- access
|
||||
type: object
|
||||
elasticsearchRef:
|
||||
description: ElasticsearchRef is a reference to an Elasticsearch
|
||||
cluster running within the same k8s cluster.
|
||||
@ -6562,14 +6619,14 @@ apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.16.4
|
||||
controller-gen.kubebuilder.io/version: v0.16.5
|
||||
helm.sh/resource-policy: keep
|
||||
labels:
|
||||
app.kubernetes.io/instance: 'logging'
|
||||
app.kubernetes.io/managed-by: 'Helm'
|
||||
app.kubernetes.io/name: 'eck-operator-crds'
|
||||
app.kubernetes.io/version: '2.15.0'
|
||||
helm.sh/chart: 'eck-operator-crds-2.15.0'
|
||||
app.kubernetes.io/version: '2.16.1'
|
||||
helm.sh/chart: 'eck-operator-crds-2.16.1'
|
||||
name: enterprisesearches.enterprisesearch.k8s.elastic.co
|
||||
spec:
|
||||
group: enterprisesearch.k8s.elastic.co
|
||||
@ -7731,14 +7788,14 @@ apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.16.4
|
||||
controller-gen.kubebuilder.io/version: v0.16.5
|
||||
helm.sh/resource-policy: keep
|
||||
labels:
|
||||
app.kubernetes.io/instance: 'logging'
|
||||
app.kubernetes.io/managed-by: 'Helm'
|
||||
app.kubernetes.io/name: 'eck-operator-crds'
|
||||
app.kubernetes.io/version: '2.15.0'
|
||||
helm.sh/chart: 'eck-operator-crds-2.15.0'
|
||||
app.kubernetes.io/version: '2.16.1'
|
||||
helm.sh/chart: 'eck-operator-crds-2.16.1'
|
||||
name: kibanas.kibana.k8s.elastic.co
|
||||
spec:
|
||||
group: kibana.k8s.elastic.co
|
||||
@ -9046,14 +9103,14 @@ apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.16.4
|
||||
controller-gen.kubebuilder.io/version: v0.16.5
|
||||
helm.sh/resource-policy: keep
|
||||
labels:
|
||||
app.kubernetes.io/instance: 'logging'
|
||||
app.kubernetes.io/managed-by: 'Helm'
|
||||
app.kubernetes.io/name: 'eck-operator-crds'
|
||||
app.kubernetes.io/version: '2.15.0'
|
||||
helm.sh/chart: 'eck-operator-crds-2.15.0'
|
||||
app.kubernetes.io/version: '2.16.1'
|
||||
helm.sh/chart: 'eck-operator-crds-2.16.1'
|
||||
name: logstashes.logstash.k8s.elastic.co
|
||||
spec:
|
||||
group: logstash.k8s.elastic.co
|
||||
@ -10293,14 +10350,14 @@ apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.16.4
|
||||
controller-gen.kubebuilder.io/version: v0.16.5
|
||||
helm.sh/resource-policy: keep
|
||||
labels:
|
||||
app.kubernetes.io/instance: 'logging'
|
||||
app.kubernetes.io/managed-by: 'Helm'
|
||||
app.kubernetes.io/name: 'eck-operator-crds'
|
||||
app.kubernetes.io/version: '2.15.0'
|
||||
helm.sh/chart: 'eck-operator-crds-2.15.0'
|
||||
app.kubernetes.io/version: '2.16.1'
|
||||
helm.sh/chart: 'eck-operator-crds-2.16.1'
|
||||
name: stackconfigpolicies.stackconfigpolicy.k8s.elastic.co
|
||||
spec:
|
||||
group: stackconfigpolicy.k8s.elastic.co
|
||||
|
@ -103,7 +103,7 @@ kind: ClusterRole
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "eck-operator.labels" . | nindent 4 }}
|
||||
name: "{{ include "eck-operator.fullname" . }}-proxy-role"
|
||||
name: "{{ include "eck-operator.fullname" . }}-metrics-auth-role"
|
||||
rules:
|
||||
- apiGroups:
|
||||
- authentication.k8s.io
|
||||
|
@ -13,11 +13,8 @@ data:
|
||||
{{- if and .Values.config.metrics.secureMode.enabled (eq $metricsPort 0) }}
|
||||
{{- fail "config.metrics.port must be greater than 0 when config.metrics.secureMode.enabled is true" }}
|
||||
{{- end }}
|
||||
{{- if .Values.config.metrics.secureMode.enabled }}
|
||||
metrics-port: {{ add $metricsPort 1 }}
|
||||
{{- else }}
|
||||
metrics-port: {{ $metricsPort }}
|
||||
{{- end }}
|
||||
metrics-secure: {{ .Values.config.metrics.secureMode.enabled }}
|
||||
container-registry: {{ .Values.config.containerRegistry }}
|
||||
{{- with .Values.config.containerSuffix }}
|
||||
container-suffix: {{ . }}
|
||||
|
@ -85,11 +85,11 @@ kind: ClusterRoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "eck-operator.labels" $ | nindent 4 }}
|
||||
name: "{{ include "eck-operator.fullname" . }}-proxy-rolebinding"
|
||||
name: "{{ include "eck-operator.fullname" . }}-metrics-auth-rolebinding"
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: "{{ include "eck-operator.fullname" . }}-proxy-role"
|
||||
name: "{{ include "eck-operator.fullname" . }}-metrics-auth-role"
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: {{ $svcAccount }}
|
||||
|
@ -1,4 +1,4 @@
|
||||
{{- if .Values.config.metrics.secureMode.enabled }}
|
||||
{{- if and .Values.config.metrics.secureMode.enabled .Values.serviceMonitor.enabled }}
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
@ -19,10 +19,13 @@ spec:
|
||||
scheme: https
|
||||
interval: 30s
|
||||
tlsConfig:
|
||||
insecureSkipVerify: {{ .Values.config.metrics.secureMode.tls.insecureSkipVerify | default false }}
|
||||
{{- if (not .Values.config.metrics.secureMode.tls.insecureSkipVerify) }}
|
||||
{{- $leading_path := trimSuffix "/" .Values.config.metrics.secureMode.tls.caMountDirectory }}
|
||||
{{- with .Values.config.metrics.secureMode.tls.caSecret }}
|
||||
{{- $insecureSkipVerify := (ternary .Values.config.metrics.secureMode.tls.insecureSkipVerify .Values.serviceMonitor.insecureSkipVerify (hasKey .Values.config.metrics.secureMode.tls "insecureSkipVerify")) }}
|
||||
insecureSkipVerify: {{ $insecureSkipVerify }}
|
||||
{{- if (not $insecureSkipVerify) }}
|
||||
{{- $caMountDirectory := or (.Values.config.metrics.secureMode.tls.caMountDirectory) (.Values.serviceMonitor.caMountDirectory) -}}
|
||||
{{- $leading_path := trimSuffix "/" $caMountDirectory }}
|
||||
{{- $caSecret := or (.Values.config.metrics.secureMode.tls.caSecret) (.Values.serviceMonitor.caSecret) -}}
|
||||
{{- with $caSecret }}
|
||||
caFile: "{{ $leading_path }}/{{ . }}/ca.crt"
|
||||
{{- end }}
|
||||
serverName: "{{ include "eck-operator.fullname" . }}-metrics.{{ .Release.Namespace }}.svc"
|
@ -51,7 +51,7 @@ spec:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- image: "{{ .Values.image.repository }}{{- if .Values.config.ubiOnly -}}-ubi{{- end -}}:{{ default .Chart.AppVersion .Values.image.tag }}"
|
||||
- image: "{{ .Values.image.repository }}{{- if .Values.config.ubiOnly -}}-ubi{{- end -}}{{- if .Values.image.fips -}}-fips{{- end -}}:{{ default .Chart.AppVersion .Values.image.tag }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
name: manager
|
||||
args:
|
||||
@ -89,7 +89,7 @@ spec:
|
||||
{{- end }}
|
||||
{{- if or .Values.webhook.enabled (gt $metricsPort 0) }}
|
||||
ports:
|
||||
{{- if and (gt $metricsPort 0) (not .Values.config.metrics.secureMode.enabled) }}
|
||||
{{- if (gt $metricsPort 0) }}
|
||||
- containerPort: {{ $metricsPort }}
|
||||
name: metrics
|
||||
protocol: TCP
|
||||
@ -109,48 +109,13 @@ spec:
|
||||
name: cert
|
||||
readOnly: true
|
||||
{{- end }}
|
||||
{{- with .Values.volumeMounts }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- if .Values.config.metrics.secureMode.enabled }}
|
||||
- name: kube-rbac-proxy
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- "ALL"
|
||||
image: gcr.io/kubebuilder/kube-rbac-proxy:v0.15.0
|
||||
args:
|
||||
- "--secure-listen-address=0.0.0.0:{{ $metricsPort }}"
|
||||
- "--upstream=http://127.0.0.1:{{ add $metricsPort 1 }}/"
|
||||
- "--logtostderr=true"
|
||||
- "--v=0"
|
||||
{{- if .Values.config.metrics.secureMode.tls.certificateSecret }}
|
||||
- "--tls-cert-file=/tls/tls.crt"
|
||||
- "--tls-private-key-file=/tls/tls.key"
|
||||
{{- end }}
|
||||
{{- if or .Values.config.metrics.secureMode.tls.certificateSecret .Values.config.metrics.secureMode.volumeMounts }}
|
||||
volumeMounts:
|
||||
{{- with .Values.config.metrics.secureMode.volumeMounts }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- if .Values.config.metrics.secureMode.tls.certificateSecret }}
|
||||
- mountPath: "/tls"
|
||||
- mountPath: "/tmp/k8s-metrics-server/serving-certs"
|
||||
name: tls-certificate
|
||||
readOnly: true
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
ports:
|
||||
- containerPort: {{ $metricsPort }}
|
||||
protocol: TCP
|
||||
name: metrics
|
||||
resources:
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 128Mi
|
||||
requests:
|
||||
cpu: 5m
|
||||
memory: 64Mi
|
||||
{{- with .Values.volumeMounts }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
volumes:
|
||||
- name: conf
|
||||
|
@ -24,6 +24,10 @@ image:
|
||||
pullPolicy: IfNotPresent
|
||||
# tag is the container image tag. If not defined, defaults to chart appVersion.
|
||||
tag: null
|
||||
# fips specifies whether the operator will use a FIPS compliant container image for its own StatefulSet image.
|
||||
# This setting does not apply to Elastic Stack applications images.
|
||||
# Can be combined with config.ubiOnly.
|
||||
fips: false
|
||||
|
||||
# priorityClassName defines the PriorityClass to be used by the operator pods.
|
||||
priorityClassName: ""
|
||||
@ -178,29 +182,12 @@ config:
|
||||
port: "0"
|
||||
# secureMode contains the options for enabling and configuring RBAC and TLS/HTTPs for the metrics endpoint.
|
||||
secureMode:
|
||||
# secureMode.enabled specifies whether to enable RBAC and TLS/HTTPs for the metrics endpoint. (Will be enabled by default in v2.14.0)
|
||||
# * This option requires using a ServiceMonitor to scrape the metrics and as such is mutually exclusive with the podMonitor.enabled option.
|
||||
# secureMode.enabled specifies whether to enable RBAC and TLS/HTTPs for the metrics endpoint.
|
||||
# * This option makes most sense when using a ServiceMonitor to scrape the metrics and is therefore mutually exclusive with the podMonitor.enabled option.
|
||||
# * This option also requires using cluster scoped resources (ClusterRole, ClusterRoleBinding) to
|
||||
# grant access to the /metrics endpoint. (createClusterScopedResources: true is required)
|
||||
#
|
||||
# This option requires the following settings within Prometheus to function:
|
||||
# 1. RBAC settings for the Prometheus instance to access the metrics endpoint.
|
||||
#
|
||||
# - nonResourceURLs:
|
||||
# - /metrics
|
||||
# verbs:
|
||||
# - get
|
||||
#
|
||||
# 2. If using the Prometheus Operator and your Prometheus instance is not in the same namespace as the operator you will need
|
||||
# the Prometheus Operator configured with the following Helm values:
|
||||
#
|
||||
# prometheus:
|
||||
# prometheusSpec:
|
||||
# serviceMonitorNamespaceSelector: {}
|
||||
# serviceMonitorSelectorNilUsesHelmValues: false
|
||||
enabled: false
|
||||
# additional volume mounts for the kube-rbac-proxy container.
|
||||
volumeMounts: []
|
||||
tls:
|
||||
# certificateSecret is the name of the tls secret containing the custom TLS certificate and key for the secure metrics endpoint.
|
||||
#
|
||||
@ -212,27 +199,6 @@ config:
|
||||
# example: kubectl create secret tls eck-metrics-tls-certificate -n elastic-system \
|
||||
# --cert=/path/to/tls.crt --key=/path/to/tls.key
|
||||
certificateSecret: ""
|
||||
# caSecret is the name of the secret containing the custom CA certificate used to generate the custom TLS certificate for the secure metrics endpoint.
|
||||
#
|
||||
# * This *must* be the name of the secret containing the CA certificate used to sign the custom TLS certificate.
|
||||
# * This secret *must* be in the same namespace as the Prometheus instance that will scrape the metrics.
|
||||
# * If using the Prometheus operator this secret must be within the `spec.secrets` field of the `Prometheus` custom resource such that it is mounted into the Prometheus pod at `caMountDirectory`, which defaults to /etc/prometheus/secrets/{secret-name}.
|
||||
# * This is an optional setting and is only required if you are using a custom TLS certificate.
|
||||
# * Key must be named ca.crt.
|
||||
#
|
||||
# example: kubectl create secret generic eck-metrics-tls-ca -n monitoring \
|
||||
# --from-file=ca.crt=/path/to/ca.pem
|
||||
caSecret: ""
|
||||
# caMountDirectory is the directory at which the CA certificate is mounted within the Prometheus pod.
|
||||
#
|
||||
# * You should only need to adjust this if you are *not* using the Prometheus operator.
|
||||
caMountDirectory: "/etc/prometheus/secrets/"
|
||||
# insecureSkipVerify specifies whether to skip verification of the TLS certificate for the secure metrics endpoint.
|
||||
#
|
||||
# * If this setting is set to false, then the following settings are required:
|
||||
# - certificateSecret
|
||||
# - caSecret
|
||||
insecureSkipVerify: true
|
||||
|
||||
# containerRegistry to use for pulling Elasticsearch and other application container images.
|
||||
containerRegistry: docker.elastic.co
|
||||
@ -333,11 +299,49 @@ podMonitor:
|
||||
# Prometheus ServiceMonitor configuration
|
||||
# Only used when config.enableSecureMetrics is true
|
||||
# Reference: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#servicemonitor
|
||||
serviceMonitor: {}
|
||||
|
||||
serviceMonitor:
|
||||
# This option requires the following settings within Prometheus to function:
|
||||
# 1. RBAC settings for the Prometheus instance to access the metrics endpoint.
|
||||
#
|
||||
# - nonResourceURLs:
|
||||
# - /metrics
|
||||
# verbs:
|
||||
# - get
|
||||
#
|
||||
# 2. If using the Prometheus Operator and your Prometheus instance is not in the same namespace as the operator you will need
|
||||
# the Prometheus Operator configured with the following Helm values:
|
||||
#
|
||||
# prometheus:
|
||||
# prometheusSpec:
|
||||
# serviceMonitorNamespaceSelector: {}
|
||||
# serviceMonitorSelectorNilUsesHelmValues: false
|
||||
#
|
||||
# allows to disable the serviceMonitor, enabled by default for backwards compatibility
|
||||
enabled: true
|
||||
# namespace determines in which namespace the serviceMonitor will be deployed.
|
||||
# If not set the serviceMonitor will be created in the namespace where the Helm release is installed into
|
||||
# namespace: monitoring
|
||||
# caSecret is the name of the secret containing the custom CA certificate used to generate the custom TLS certificate for the secure metrics endpoint.
|
||||
#
|
||||
# * This *must* be the name of the secret containing the CA certificate used to sign the custom TLS certificate for the metrics endpoint.
|
||||
# * This secret *must* be in the same namespace as the Prometheus instance that will scrape the metrics.
|
||||
# * If using the Prometheus operator this secret must be within the `spec.secrets` field of the `Prometheus` custom resource such that it is mounted into the Prometheus pod at `caMountDirectory`, which defaults to /etc/prometheus/secrets/{secret-name}.
|
||||
# * This is an optional setting and is only required if you are using a custom TLS certificate.
|
||||
# * Key must be named ca.crt.
|
||||
#
|
||||
# example: kubectl create secret generic eck-metrics-tls-ca -n monitoring \
|
||||
# --from-file=ca.crt=/path/to/ca.pem
|
||||
caSecret: ""
|
||||
# caMountDirectory is the directory at which the CA certificate is mounted within the Prometheus pod.
|
||||
#
|
||||
# * You should only need to adjust this if you are *not* using the Prometheus operator.
|
||||
caMountDirectory: "/etc/prometheus/secrets/"
|
||||
# insecureSkipVerify specifies whether to skip verification of the TLS certificate for the secure metrics endpoint.
|
||||
#
|
||||
# * If this setting is set to false, then the following settings are required:
|
||||
# - certificateSecret
|
||||
# - caSecret
|
||||
insecureSkipVerify: true
|
||||
|
||||
# Globals meant for internal use only
|
||||
global:
|
||||
|
@ -0,0 +1,16 @@
|
||||
apiVersion: postgresql.cnpg.io/v1
|
||||
kind: ClusterImageCatalog
|
||||
metadata:
|
||||
name: postgresql
|
||||
spec:
|
||||
images:
|
||||
- major: 13
|
||||
image: ghcr.io/cloudnative-pg/postgresql:13.18-34-bookworm@sha256:79ffc8faf88cbaf768791a23f15031cc400778321378237ead6cb77e8cfcf192
|
||||
- major: 14
|
||||
image: ghcr.io/cloudnative-pg/postgresql:14.15-34-bookworm@sha256:95b3f1a94c3d6755128a57e335d35ff196597078e09b93438009b8a9dcb2d409
|
||||
- major: 15
|
||||
image: ghcr.io/cloudnative-pg/postgresql:15.10-34-bookworm@sha256:4e8945ec4d6b744aa612f80c6b80cc525eafb411e44527c81f249fda35363765
|
||||
- major: 16
|
||||
image: ghcr.io/cloudnative-pg/postgresql:16.6-33-bookworm@sha256:7dfda49485274b61ada9bb347caffac01dee442ffd119eb19317a2692347657b
|
||||
- major: 17
|
||||
image: ghcr.io/cloudnative-pg/postgresql:17.2-33-bookworm@sha256:52b78e8e4a297e268be168c7e107a2117072dc38f4a11d9d056ff0cc13d4007f
|
File diff suppressed because one or more lines are too long
@ -16,6 +16,9 @@ helm template charts/eck-operator/charts/eck-operator-crds --name-template loggi
|
||||
rm -rf charts/eck-operator/charts
|
||||
yq eval -Mi 'del(.dependencies)' charts/eck-operator/Chart.yaml
|
||||
|
||||
# get latest cloudnative-pg clusterimagecatalog
|
||||
wget -qO templates/cloudnative-pg/ClusterImageCatalog-bookworm.yaml https://raw.githubusercontent.com/cloudnative-pg/postgres-containers/refs/heads/main/Debian/ClusterImageCatalog-bookworm.yaml
|
||||
|
||||
# Create ZDT dashboard configmap
|
||||
../kubezero-metrics/sync_grafana_dashboards.py dashboards-pg.yaml templates/cloudnative-pg/grafana-dashboards.yaml
|
||||
../kubezero-metrics/sync_grafana_dashboards.py dashboards-strimzi.yaml templates/strimzi/grafana-dashboards.yaml
|
||||
|
@ -48,3 +48,24 @@ strimzi-kafka-operator:
|
||||
|
||||
monitoring:
|
||||
podMonitorEnabled: false
|
||||
|
||||
# rabbitmq cluster operator
|
||||
rabbitmq-cluster-operator:
|
||||
enabled: false
|
||||
#rabbitmqImage:
|
||||
# tag: 3.11.4-debian-11-r0
|
||||
|
||||
useCertManager: true
|
||||
|
||||
clusterOperator:
|
||||
metrics:
|
||||
enabled: false
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
|
||||
msgTopologyOperator:
|
||||
enabled: false
|
||||
metrics:
|
||||
enabled: false
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
|
@ -2,7 +2,7 @@ apiVersion: v2
|
||||
name: kubezero-storage
|
||||
description: KubeZero umbrella chart for all things storage incl. AWS EBS/EFS, openEBS-lvm, gemini
|
||||
type: application
|
||||
version: 0.8.9
|
||||
version: 0.8.10
|
||||
home: https://kubezero.com
|
||||
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
|
||||
keywords:
|
||||
@ -24,11 +24,11 @@ dependencies:
|
||||
condition: lvm-localpv.enabled
|
||||
repository: https://openebs.github.io/lvm-localpv
|
||||
- name: aws-ebs-csi-driver
|
||||
version: 2.36.0
|
||||
version: 2.39.3
|
||||
condition: aws-ebs-csi-driver.enabled
|
||||
repository: https://kubernetes-sigs.github.io/aws-ebs-csi-driver
|
||||
- name: aws-efs-csi-driver
|
||||
version: 3.0.8
|
||||
version: 3.1.6
|
||||
condition: aws-efs-csi-driver.enabled
|
||||
repository: https://kubernetes-sigs.github.io/aws-efs-csi-driver
|
||||
- name: gemini
|
||||
@ -36,7 +36,7 @@ dependencies:
|
||||
condition: gemini.enabled
|
||||
repository: https://charts.fairwinds.com/stable
|
||||
- name: k8up
|
||||
version: 4.8.1
|
||||
version: 4.8.3
|
||||
condition: k8up.enabled
|
||||
repository: https://k8up-io.github.io/k8up
|
||||
kubeVersion: ">= 1.26.0"
|
||||
|
@ -1,4 +1,77 @@
|
||||
# Helm chart
|
||||
|
||||
## v2.39.3
|
||||
|
||||
### Urgent Upgrade Notes
|
||||
|
||||
Please upgrade from v2.39.2 directly to v2.39.3 to avoid upgrade failures if you are using this chart as a subchart.
|
||||
|
||||
### Bug or Regression
|
||||
- Fix sub-charting by removing values schema ([#2322](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/2322), [@ElijahQuinones]((https://github.com/ElijahQuinones)
|
||||
|
||||
## v2.39.2
|
||||
|
||||
### Urgent Upgrade Notes
|
||||
|
||||
Please upgrade from v2.38.1 directly to v2.39.2 to avoid upgrade failures if you are relying on `a1CompatibilityDaemonSet`.
|
||||
|
||||
### Bug or Regression
|
||||
- Fix helm regression when `a1CompatibilityDaemonSet=true` ([#2316](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/2316), [@AndrewSirenko](https://github.com/AndrewSirenko))
|
||||
|
||||
## v2.39.1
|
||||
|
||||
### Bug or Regression
|
||||
- Fix `node.selinux` to properly set SELinux-specific mounts as ReadOnly ([#2311](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/2311), [@AndrewSirenko](https://github.com/AndrewSirenko))
|
||||
|
||||
## v2.39.0
|
||||
|
||||
### Feature
|
||||
|
||||
- Add Helm parameter `node.selinux` to enable SELinux-specific mounts on the node DaemonSet ([#2253](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/2253), [@ConnorJC3](https://github.com/ConnorJC3))
|
||||
- Add Helm FIPS parameter ([#2244](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/2244), [@ConnorJC3](https://github.com/ConnorJC3))
|
||||
|
||||
## v2.38.1
|
||||
|
||||
### Feature
|
||||
|
||||
- Render templated controller service account parameters ([#2243](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/2243), [@ElijahQuinones](https://github.com/ElijahQuinones))
|
||||
|
||||
### Bug or Regression
|
||||
|
||||
- Fix rendering failrue when `node.enableMetrics` is set to `true` ([#2250](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/2250), [@mindw](https://github.com/mindw))
|
||||
- Remove duplicate 'enableMetrics' key ([#2256](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/2256), [@sule26](https://github.com/sule26))
|
||||
|
||||
## v2.37.0
|
||||
* Bump driver version to `v1.37.0`
|
||||
* Add init containers to node daemonset ([#2215](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/2215), [@clbx](https://github.com/clbx))
|
||||
* Fix fetching test package version for kubetest in helm-tester ([#2203](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/2203), [@torredil](https://github.com/torredil))
|
||||
|
||||
## v2.36.0
|
||||
* Bump driver version to `v1.36.0`
|
||||
* Add recommended autoscalar Tolerations to driver DaemonSet ([#2165](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/2165), [@AndrewSirenko](https://github.com/AndrewSirenko))
|
||||
* Add support for unhealthyPodEvictionPolicy on PodDisruptionBudget ([#2159](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/2159), [@peterabarr](https://github.com/peterabarr))
|
||||
|
||||
## v2.35.1
|
||||
* Fix an issue causing the `csi-attacher` container to get stuck in `CrashLoopBackoff` on clusters with VAC enabled. Users with a VAC-enabled cluster are strongly encouraged to skip `v2.35.0` and/or upgrade directly to `v2.35.1` or later.
|
||||
|
||||
## v2.35.0
|
||||
* Bump driver version to `v1.35.0`
|
||||
* Add reservedVolumeAttachments to windows nodes ([#2134](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/2134),[@AndrewSirenko](https://github.com/AndrewSirenko))
|
||||
* Add legacy-xfs driver option for clusters that mount XFS volumes to nodes with Linux kernel <= 5.4. Warning: This is a temporary workaround for customers unable to immediately upgrade their nodes. It will be removed in a future release. See [the options documentation](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/blob/release-1.35/docs/options.md) for more details.([#2121](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/2121),[@AndrewSirenko](https://github.com/AndrewSirenko))
|
||||
* Add back "Auto-enable VAC on clusters with beta API version" ([#2141](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/2141), [@ConnorJC3](https://github.com/ConnorJC3))
|
||||
|
||||
## v2.34.0
|
||||
* Bump driver version to `v1.34.0`
|
||||
* Add toggle for PodDisruptionBudget in chart ([#2109](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/2109), [@AndrewSirenko](https://github.com/AndrewSirenko))
|
||||
* Add nodeComponentOnly parameter to helm chart ([#2106](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/2106), [@AndrewSirenko](https://github.com/AndrewSirenko))
|
||||
* fix: sidecars.snapshotter.logLevel not being respect ([#2102](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/2102), [@zyue110026](https://github.com/zyue110026))
|
||||
|
||||
## v2.33.0
|
||||
* Bump driver version to `v1.33.0`
|
||||
* Bump CSI sidecar container versions
|
||||
* Add fix for enableLinux node parameter ([#2078](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/2078), [@ElijahQuinones](https://github.com/ElijahQuinones))
|
||||
* Fix dnsConfig indentation in controller template file ([#2084](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/pull/2084), [@cHiv0rz](https://github.com/cHiv0rz))
|
||||
|
||||
## v2.32.0
|
||||
* Bump driver version to `v1.32.0`
|
||||
* Bump CSI sidecar container versions
|
||||
|
@ -1,5 +1,5 @@
|
||||
apiVersion: v2
|
||||
appVersion: 1.32.0
|
||||
appVersion: 1.39.0
|
||||
description: A Helm chart for AWS EBS CSI Driver
|
||||
home: https://github.com/kubernetes-sigs/aws-ebs-csi-driver
|
||||
keywords:
|
||||
@ -13,4 +13,4 @@ maintainers:
|
||||
name: aws-ebs-csi-driver
|
||||
sources:
|
||||
- https://github.com/kubernetes-sigs/aws-ebs-csi-driver
|
||||
version: 2.32.0
|
||||
version: 2.39.3
|
||||
|
@ -2,4 +2,6 @@ To verify that aws-ebs-csi-driver has started, run:
|
||||
|
||||
kubectl get pod -n {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "aws-ebs-csi-driver.name" . }},app.kubernetes.io/instance={{ .Release.Name }}"
|
||||
|
||||
NOTE: The [CSI Snapshotter](https://github.com/kubernetes-csi/external-snapshotter) controller and CRDs will no longer be installed as part of this chart and moving forward will be a prerequisite of using the snap shotting functionality.
|
||||
[ACTION REQUIRED] Update to the EBS CSI Driver IAM Policy
|
||||
|
||||
Due to an upcoming change in handling of IAM polices for the CreateVolume API when creating a volume from an EBS snapshot, a change to your EBS CSI Driver policy may be needed. For more information and remediation steps, see GitHub issue #2190 (https://github.com/kubernetes-sigs/aws-ebs-csi-driver/issues/2190). This change affects all versions of the EBS CSI Driver and action may be required even on clusters where the driver is not upgraded.
|
||||
|
@ -31,6 +31,13 @@ Create chart name and version as used by the chart label.
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Determine image
|
||||
*/}}
|
||||
{{- define "aws-ebs-csi-driver.fullImagePath" -}}
|
||||
{{ printf "%s%s:%s%s" (default "" .Values.image.containerRegistry) .Values.image.repository (default (printf "v%s" .Chart.AppVersion) (.Values.image.tag | toString)) (.Values.fips | ternary "-fips" "") }}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
@ -83,3 +90,21 @@ Handle http proxy env vars
|
||||
- name: NO_PROXY
|
||||
value: {{ .Values.proxy.no_proxy | quote }}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Recommended daemonset tolerations
|
||||
*/}}
|
||||
{{- define "aws-ebs-csi-driver.daemonset-tolerations" -}}
|
||||
# Prevents stateful workloads from being scheduled to node before CSI Driver reports volume attachment limit
|
||||
- key: "ebs.csi.aws.com/agent-not-ready"
|
||||
operator: "Exists"
|
||||
# Prevents undesired eviction by Cluster Autoscalar
|
||||
- key: "ToBeDeletedByClusterAutoscaler"
|
||||
operator: Exists
|
||||
# Prevents undesired eviction by v1 Karpenter
|
||||
- key: "karpenter.sh/disrupted"
|
||||
operator: Exists
|
||||
# Prevents undesired eviction by v1beta1 Karpenter
|
||||
- key: "karpenter.sh/disruption"
|
||||
operator: Exists
|
||||
{{- end -}}
|
||||
|
@ -49,6 +49,7 @@ spec:
|
||||
{{- with .Values.node.tolerations }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- include "aws-ebs-csi-driver.daemonset-tolerations" . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- if .Values.node.windowsHostProcess }}
|
||||
securityContext:
|
||||
@ -56,10 +57,14 @@ spec:
|
||||
hostProcess: true
|
||||
runAsUserName: "NT AUTHORITY\\SYSTEM"
|
||||
hostNetwork: true
|
||||
{{- with .Values.node.initContainers }}
|
||||
initContainers:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: ebs-plugin
|
||||
image: {{ printf "%s%s:%s" (default "" .Values.image.containerRegistry) .Values.image.repository (default (printf "v%s" .Chart.AppVersion) (toString .Values.image.tag)) }}
|
||||
image: {{ include "aws-ebs-csi-driver.fullImagePath" $ }}
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
{{- if .Values.node.windowsHostProcess }}
|
||||
command:
|
||||
@ -68,9 +73,15 @@ spec:
|
||||
args:
|
||||
- node
|
||||
- --endpoint=$(CSI_ENDPOINT)
|
||||
{{- with .Values.node.reservedVolumeAttachments }}
|
||||
- --reserved-volume-attachments={{ . }}
|
||||
{{- end }}
|
||||
{{- with .Values.node.volumeAttachLimit }}
|
||||
- --volume-attach-limit={{ . }}
|
||||
{{- end }}
|
||||
{{- if .Values.node.legacyXFS }}
|
||||
- --legacy-xfs=true
|
||||
{{- end}}
|
||||
{{- with .Values.node.loggingFormat }}
|
||||
- --logging-format={{ . }}
|
||||
{{- end }}
|
||||
@ -100,6 +111,10 @@ spec:
|
||||
value: {{ .otelServiceName }}
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: {{ .otelExporterEndpoint }}
|
||||
{{- if .Values.fips }}
|
||||
- name: AWS_USE_FIPS_ENDPOINT
|
||||
value: "true"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- with .Values.node.env }}
|
||||
{{- . | toYaml | nindent 12 }}
|
||||
|
@ -1,5 +1,5 @@
|
||||
{{- define "node" }}
|
||||
{{- if or (eq (default true .Values.node.enableLinux) true) }}
|
||||
{{- if .Values.node.enableLinux }}
|
||||
---
|
||||
kind: DaemonSet
|
||||
apiVersion: apps/v1
|
||||
@ -53,17 +53,20 @@ spec:
|
||||
{{- with .Values.node.tolerations }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
- key: "ebs.csi.aws.com/agent-not-ready"
|
||||
operator: "Exists"
|
||||
{{- include "aws-ebs-csi-driver.daemonset-tolerations" . | nindent 8 }}
|
||||
{{- end }}
|
||||
hostNetwork: {{ .Values.node.hostNetwork }}
|
||||
{{- with .Values.node.securityContext }}
|
||||
securityContext:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.node.initContainers }}
|
||||
initContainers:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: ebs-plugin
|
||||
image: {{ printf "%s%s:%s" (default "" .Values.image.containerRegistry) .Values.image.repository (default (printf "v%s" .Chart.AppVersion) (toString .Values.image.tag)) }}
|
||||
image: {{ include "aws-ebs-csi-driver.fullImagePath" $ }}
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
args:
|
||||
- node
|
||||
@ -71,9 +74,18 @@ spec:
|
||||
{{- with .Values.node.reservedVolumeAttachments }}
|
||||
- --reserved-volume-attachments={{ . }}
|
||||
{{- end }}
|
||||
{{- if .Values.node.enableMetrics }}
|
||||
- --http-endpoint=0.0.0.0:3302
|
||||
{{- end}}
|
||||
{{- with .Values.node.kubeletPath }}
|
||||
- --csi-mount-point-prefix={{ . }}/plugins/kubernetes.io/csi/ebs.csi.aws.com/
|
||||
{{- end}}
|
||||
{{- with .Values.node.volumeAttachLimit }}
|
||||
- --volume-attach-limit={{ . }}
|
||||
{{- end }}
|
||||
{{- if .Values.node.legacyXFS }}
|
||||
- --legacy-xfs=true
|
||||
{{- end}}
|
||||
{{- with .Values.node.loggingFormat }}
|
||||
- --logging-format={{ . }}
|
||||
{{- end }}
|
||||
@ -100,6 +112,10 @@ spec:
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: {{ .otelExporterEndpoint }}
|
||||
{{- end }}
|
||||
{{- if .Values.fips }}
|
||||
- name: AWS_USE_FIPS_ENDPOINT
|
||||
value: "true"
|
||||
{{- end }}
|
||||
{{- with .Values.node.env }}
|
||||
{{- . | toYaml | nindent 12 }}
|
||||
{{- end }}
|
||||
@ -115,6 +131,14 @@ spec:
|
||||
mountPath: /csi
|
||||
- name: device-dir
|
||||
mountPath: /dev
|
||||
{{- if .Values.node.selinux }}
|
||||
- name: selinux-sysfs
|
||||
mountPath: /sys/fs/selinux
|
||||
readOnly: true
|
||||
- name: selinux-config
|
||||
mountPath: /etc/selinux/config
|
||||
readOnly: true
|
||||
{{- end }}
|
||||
{{- with .Values.node.volumeMounts }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
@ -232,6 +256,16 @@ spec:
|
||||
hostPath:
|
||||
path: /dev
|
||||
type: Directory
|
||||
{{- if .Values.node.selinux }}
|
||||
- name: selinux-sysfs
|
||||
hostPath:
|
||||
path: /sys/fs/selinux
|
||||
type: Directory
|
||||
- name: selinux-config
|
||||
hostPath:
|
||||
path: /etc/selinux/config
|
||||
type: File
|
||||
{{- end }}
|
||||
- name: probe-dir
|
||||
{{- if .Values.node.probeDirVolume }}
|
||||
{{- toYaml .Values.node.probeDirVolume | nindent 10 }}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{- if not .Values.nodeComponentOnly -}}
|
||||
---
|
||||
kind: ClusterRole
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
@ -5,22 +6,23 @@ metadata:
|
||||
name: ebs-external-attacher-role
|
||||
labels:
|
||||
{{- include "aws-ebs-csi-driver.labels" . | nindent 4 }}
|
||||
# Do not modify the rules below manually, see `make update-sidecar-dependencies`
|
||||
# BEGIN AUTOGENERATED RULES
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["persistentvolumes"]
|
||||
verbs: [ "get", "list", "watch", "update", "patch" ]
|
||||
- apiGroups: [ "" ]
|
||||
resources: [ "nodes" ]
|
||||
verbs: [ "get", "list", "watch" ]
|
||||
- apiGroups: [ "csi.storage.k8s.io" ]
|
||||
resources: [ "csinodeinfos" ]
|
||||
verbs: ["get", "list", "watch", "patch"]
|
||||
- apiGroups: ["storage.k8s.io"]
|
||||
resources: ["csinodes"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["storage.k8s.io"]
|
||||
resources: ["volumeattachments"]
|
||||
verbs: [ "get", "list", "watch", "update", "patch" ]
|
||||
verbs: ["get", "list", "watch", "patch"]
|
||||
- apiGroups: ["storage.k8s.io"]
|
||||
resources: ["volumeattachments/status"]
|
||||
verbs: ["patch"]
|
||||
# END AUTOGENERATED RULES
|
||||
{{- with .Values.sidecars.attacher.additionalClusterRoleRules }}
|
||||
{{- . | toYaml | nindent 2 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{- if not .Values.nodeComponentOnly -}}
|
||||
---
|
||||
kind: ClusterRole
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
@ -5,7 +6,14 @@ metadata:
|
||||
name: ebs-external-provisioner-role
|
||||
labels:
|
||||
{{- include "aws-ebs-csi-driver.labels" . | nindent 4 }}
|
||||
# Do not modify the rules below manually, see `make update-sidecar-dependencies`
|
||||
# BEGIN AUTOGENERATED RULES
|
||||
rules:
|
||||
# The following rule should be uncommented for plugins that require secrets
|
||||
# for provisioning.
|
||||
# - apiGroups: [""]
|
||||
# resources: ["secrets"]
|
||||
# verbs: ["get", "list"]
|
||||
- apiGroups: [""]
|
||||
resources: ["persistentvolumes"]
|
||||
verbs: ["get", "list", "watch", "create", "patch", "delete"]
|
||||
@ -30,12 +38,19 @@ rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["nodes"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
# Access to volumeattachments is only needed when the CSI driver
|
||||
# has the PUBLISH_UNPUBLISH_VOLUME controller capability.
|
||||
# In that case, external-provisioner will watch volumeattachments
|
||||
# to determine when it is safe to delete a volume.
|
||||
- apiGroups: ["storage.k8s.io"]
|
||||
resources: ["volumeattachments"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
# END AUTOGENERATED RULES
|
||||
# Extra rule: VAC rules not present in upstream example
|
||||
- apiGroups: ["storage.k8s.io"]
|
||||
resources: ["volumeattributesclasses"]
|
||||
verbs: ["get"]
|
||||
{{- with .Values.sidecars.provisioner.additionalClusterRoleRules }}
|
||||
{{- . | toYaml | nindent 2 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{- if not .Values.nodeComponentOnly -}}
|
||||
---
|
||||
kind: ClusterRole
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
@ -5,6 +6,8 @@ metadata:
|
||||
name: ebs-external-resizer-role
|
||||
labels:
|
||||
{{- include "aws-ebs-csi-driver.labels" . | nindent 4 }}
|
||||
# Do not modify the rules below manually, see `make update-sidecar-dependencies`
|
||||
# BEGIN AUTOGENERATED RULES
|
||||
rules:
|
||||
# The following rule should be uncommented for plugins that require secrets
|
||||
# for provisioning.
|
||||
@ -13,25 +16,25 @@ rules:
|
||||
# verbs: ["get", "list", "watch"]
|
||||
- apiGroups: [""]
|
||||
resources: ["persistentvolumes"]
|
||||
verbs: [ "get", "list", "watch", "update", "patch" ]
|
||||
verbs: ["get", "list", "watch", "patch"]
|
||||
- apiGroups: [""]
|
||||
resources: ["persistentvolumeclaims"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: [""]
|
||||
resources: [ "persistentvolumeclaims/status" ]
|
||||
verbs: [ "update", "patch" ]
|
||||
- apiGroups: [ "storage.k8s.io" ]
|
||||
resources: [ "storageclasses" ]
|
||||
resources: ["pods"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: [""]
|
||||
resources: ["persistentvolumeclaims/status"]
|
||||
verbs: ["patch"]
|
||||
- apiGroups: [""]
|
||||
resources: ["events"]
|
||||
verbs: ["list", "watch", "create", "update", "patch"]
|
||||
- apiGroups: [ "" ]
|
||||
resources: [ "pods" ]
|
||||
verbs: [ "get", "list", "watch" ]
|
||||
# only required if enabling the alpha volume modify feature
|
||||
- apiGroups: ["storage.k8s.io"]
|
||||
resources: ["volumeattributesclasses"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
# END AUTOGENERATED RULES
|
||||
{{- with .Values.sidecars.resizer.additionalClusterRoleRules }}
|
||||
{{- . | toYaml | nindent 2 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{- if not .Values.nodeComponentOnly -}}
|
||||
---
|
||||
kind: ClusterRole
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
@ -5,6 +6,8 @@ metadata:
|
||||
name: ebs-external-snapshotter-role
|
||||
labels:
|
||||
{{- include "aws-ebs-csi-driver.labels" . | nindent 4 }}
|
||||
# Do not modify the rules below manually, see `make update-sidecar-dependencies`
|
||||
# BEGIN AUTOGENERATED RULES
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["events"]
|
||||
@ -21,10 +24,21 @@ rules:
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["snapshot.storage.k8s.io"]
|
||||
resources: ["volumesnapshotcontents"]
|
||||
verbs: [ "create", "get", "list", "watch", "update", "delete", "patch" ]
|
||||
verbs: ["get", "list", "watch", "update", "patch"]
|
||||
- apiGroups: ["snapshot.storage.k8s.io"]
|
||||
resources: ["volumesnapshotcontents/status"]
|
||||
verbs: ["update", "patch"]
|
||||
- apiGroups: ["groupsnapshot.storage.k8s.io"]
|
||||
resources: ["volumegroupsnapshotclasses"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["groupsnapshot.storage.k8s.io"]
|
||||
resources: ["volumegroupsnapshotcontents"]
|
||||
verbs: ["get", "list", "watch", "update", "patch"]
|
||||
- apiGroups: ["groupsnapshot.storage.k8s.io"]
|
||||
resources: ["volumegroupsnapshotcontents/status"]
|
||||
verbs: ["update", "patch"]
|
||||
# END AUTOGENERATED RULES
|
||||
{{- with .Values.sidecars.snapshotter.additionalClusterRoleRules }}
|
||||
{{- . | toYaml | nindent 2 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{- if not .Values.nodeComponentOnly -}}
|
||||
---
|
||||
kind: ClusterRoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
@ -13,3 +14,4 @@ roleRef:
|
||||
kind: ClusterRole
|
||||
name: ebs-external-attacher-role
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
{{- end }}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{- if not .Values.nodeComponentOnly -}}
|
||||
---
|
||||
kind: ClusterRoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
@ -13,3 +14,4 @@ roleRef:
|
||||
kind: ClusterRole
|
||||
name: ebs-external-provisioner-role
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
{{- end }}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{- if not .Values.nodeComponentOnly -}}
|
||||
---
|
||||
kind: ClusterRoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
@ -13,3 +14,4 @@ roleRef:
|
||||
kind: ClusterRole
|
||||
name: ebs-external-resizer-role
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
{{- end }}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{- if not .Values.nodeComponentOnly -}}
|
||||
---
|
||||
kind: ClusterRoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
@ -13,3 +14,4 @@ roleRef:
|
||||
kind: ClusterRole
|
||||
name: ebs-external-snapshotter-role
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
{{- end -}}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{- if not .Values.nodeComponentOnly -}}
|
||||
# Controller Service
|
||||
kind: Deployment
|
||||
apiVersion: apps/v1
|
||||
@ -70,14 +71,10 @@ spec:
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: ebs-plugin
|
||||
image: {{ printf "%s%s:%s" (default "" .Values.image.containerRegistry) .Values.image.repository (default (printf "v%s" .Chart.AppVersion) (.Values.image.tag | toString)) }}
|
||||
image: {{ include "aws-ebs-csi-driver.fullImagePath" $ }}
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
args:
|
||||
{{- if ne .Release.Name "kustomize" }}
|
||||
- controller
|
||||
{{- else }}
|
||||
# - {all,controller,node} # specify the driver mode
|
||||
{{- end }}
|
||||
- --endpoint=$(CSI_ENDPOINT)
|
||||
{{- if .Values.controller.extraVolumeTags }}
|
||||
{{- include "aws-ebs-csi-driver.extra-volume-tags" . | nindent 12 }}
|
||||
@ -153,6 +150,10 @@ spec:
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: {{ .otelExporterEndpoint }}
|
||||
{{- end }}
|
||||
{{- if .Values.fips }}
|
||||
- name: AWS_USE_FIPS_ENDPOINT
|
||||
value: "true"
|
||||
{{- end }}
|
||||
{{- with .Values.controller.envFrom }}
|
||||
envFrom:
|
||||
{{- . | toYaml | nindent 12 }}
|
||||
@ -230,6 +231,9 @@ spec:
|
||||
{{- if not (regexMatch "(-retry-interval-max)" (join " " .Values.sidecars.provisioner.additionalArgs)) }}
|
||||
- --retry-interval-max=30m
|
||||
{{- end }}
|
||||
{{- if .Capabilities.APIVersions.Has "storage.k8s.io/v1beta1/VolumeAttributesClass" }}
|
||||
- --feature-gates=VolumeAttributesClass=true
|
||||
{{- end }}
|
||||
{{- range .Values.sidecars.provisioner.additionalArgs }}
|
||||
- {{ . }}
|
||||
{{- end }}
|
||||
@ -320,6 +324,7 @@ spec:
|
||||
args:
|
||||
- --csi-address=$(ADDRESS)
|
||||
- --leader-election=true
|
||||
- --v={{ .Values.sidecars.snapshotter.logLevel }}
|
||||
{{- if .Values.controller.extraCreateMetadata }}
|
||||
- --extra-create-metadata
|
||||
{{- end}}
|
||||
@ -447,6 +452,9 @@ spec:
|
||||
{{- if not (regexMatch "(-retry-interval-max)" (join " " .Values.sidecars.resizer.additionalArgs)) }}
|
||||
- --retry-interval-max=30m
|
||||
{{- end }}
|
||||
{{- if .Capabilities.APIVersions.Has "storage.k8s.io/v1beta1/VolumeAttributesClass" }}
|
||||
- --feature-gates=VolumeAttributesClass=true
|
||||
{{- end }}
|
||||
{{- range .Values.sidecars.resizer.additionalArgs }}
|
||||
- {{ . }}
|
||||
{{- end }}
|
||||
@ -515,5 +523,6 @@ spec:
|
||||
{{- end }}
|
||||
{{- if .Values.controller.dnsConfig }}
|
||||
dnsConfig:
|
||||
{{- toYaml .Values.controller.dnsConfig | nindent 4 }}
|
||||
{{- toYaml .Values.controller.dnsConfig | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{- if not .Values.nodeComponentOnly -}}
|
||||
apiVersion: {{ ternary "storage.k8s.io/v1" "storage.k8s.io/v1beta1" (semverCompare ">=1.18.0-0" .Capabilities.KubeVersion.Version) }}
|
||||
kind: CSIDriver
|
||||
metadata:
|
||||
@ -10,3 +11,4 @@ spec:
|
||||
{{- if not .Values.useOldCSIDriver }}
|
||||
fsGroupPolicy: File
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{- if not .Values.nodeComponentOnly -}}
|
||||
{{- if .Values.defaultStorageClass.enabled }}
|
||||
apiVersion: storage.k8s.io/v1
|
||||
kind: StorageClass
|
||||
@ -9,3 +10,4 @@ provisioner: ebs.csi.aws.com
|
||||
volumeBindingMode: WaitForFirstConsumer
|
||||
allowVolumeExpansion: true
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
@ -1,4 +1,4 @@
|
||||
{{- if .Values.controller.enableMetrics -}}
|
||||
{{- if and .Values.controller.enableMetrics (not .Values.nodeComponentOnly) -}}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
@ -40,3 +40,21 @@ spec:
|
||||
interval: {{ .Values.controller.serviceMonitor.interval | default "15s"}}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
---
|
||||
{{- if .Values.node.enableMetrics }}
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: ebs-csi-node
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
app: ebs-csi-node
|
||||
spec:
|
||||
selector:
|
||||
app: ebs-csi-node
|
||||
ports:
|
||||
- name: metrics
|
||||
port: 3302
|
||||
targetPort: 3302
|
||||
type: ClusterIP
|
||||
{{- end }}
|
||||
|
@ -12,6 +12,9 @@
|
||||
{{- include "node" (deepCopy $ | mustMerge $args) -}}
|
||||
{{- end }}
|
||||
{{- if .Values.a1CompatibilityDaemonSet }}
|
||||
{{- if .Values.fips -}}
|
||||
{{- fail "FIPS mode not supported for A1 instance family compatibility image" -}}
|
||||
{{- end -}}
|
||||
{{$args := dict
|
||||
"NodeName" "ebs-csi-node-a1compat"
|
||||
"Values" (dict
|
||||
@ -27,7 +30,7 @@
|
||||
(dict
|
||||
"key" "eks.amazonaws.com/compute-type"
|
||||
"operator" "NotIn"
|
||||
"values" (list "fargate")
|
||||
"values" (list "fargate" "auto" "hybrid")
|
||||
)
|
||||
(dict
|
||||
"key" "node.kubernetes.io/instance-type"
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{- if and .Values.controller.podDisruptionBudget.enabled (not .Values.nodeComponentOnly) -}}
|
||||
apiVersion: policy/v1
|
||||
kind: PodDisruptionBudget
|
||||
metadata:
|
||||
@ -10,8 +11,12 @@ spec:
|
||||
matchLabels:
|
||||
app: ebs-csi-controller
|
||||
{{- include "aws-ebs-csi-driver.selectorLabels" . | nindent 6 }}
|
||||
{{- if .Values.controller.podDisruptionBudget.unhealthyPodEvictionPolicy }}
|
||||
unhealthyPodEvictionPolicy: {{ .Values.controller.podDisruptionBudget.unhealthyPodEvictionPolicy }}
|
||||
{{- end }}
|
||||
{{- if le (.Values.controller.replicaCount | int) 2 }}
|
||||
maxUnavailable: 1
|
||||
{{- else }}
|
||||
minAvailable: 2
|
||||
{{- end }}
|
||||
{{- end -}}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{- if not .Values.nodeComponentOnly -}}
|
||||
kind: Role
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
@ -9,3 +10,4 @@ rules:
|
||||
- apiGroups: ["coordination.k8s.io"]
|
||||
resources: ["leases"]
|
||||
verbs: ["get", "watch", "list", "delete", "update", "create"]
|
||||
{{- end }}
|
||||
|
@ -1,3 +1,4 @@
|
||||
{{- if not .Values.nodeComponentOnly -}}
|
||||
kind: RoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
@ -13,3 +14,4 @@ roleRef:
|
||||
kind: Role
|
||||
name: ebs-csi-leases-role
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
{{- end }}
|
||||
|
@ -1,4 +1,4 @@
|
||||
{{- if .Values.controller.serviceAccount.create -}}
|
||||
{{- if and .Values.controller.serviceAccount.create (not .Values.nodeComponentOnly) -}}
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
@ -8,12 +8,7 @@ metadata:
|
||||
{{- include "aws-ebs-csi-driver.labels" . | nindent 4 }}
|
||||
{{- with .Values.controller.serviceAccount.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- if eq .Release.Name "kustomize" }}
|
||||
#Enable if EKS IAM roles for service accounts (IRSA) is used. See https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html for details.
|
||||
#annotations:
|
||||
# eks.amazonaws.com/role-arn: arn:<partition>:iam::<account>:role/ebs-csi-role
|
||||
{{- tpl (toYaml .) $ | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- if .Values.controller.serviceAccount.automountServiceAccountToken }}
|
||||
automountServiceAccountToken: {{ .Values.controller.serviceAccount.automountServiceAccountToken }}
|
||||
|
@ -1,4 +1,4 @@
|
||||
{{- if .Values.helmTester.enabled -}}
|
||||
{{- if and .Values.helmTester.enabled (not .Values.nodeComponentOnly) -}}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
@ -220,15 +220,28 @@ spec:
|
||||
else
|
||||
FOCUS_REGEX="${FOCUS_REGEX})"
|
||||
fi
|
||||
export KUBE_VERSION=$(kubectl version --output json | jq -r '.serverVersion.major + "." + .serverVersion.minor')
|
||||
kubetest2 noop --run-id='e2e-kubernetes' --test=ginkgo -- --test-package-version="$(curl -L https://dl.k8s.io/release/stable-${KUBE_VERSION}.txt)" --skip-regex='[Disruptive]|[Serial]' --focus-regex="$FOCUS_REGEX" --parallel=25 --test-args='-storage.testdriver=/etc/config/manifests.yaml'
|
||||
|
||||
echo "Detecting Kubernetes server version"
|
||||
export KUBE_VERSION=$(kubectl version --output json | jq -r '.serverVersion.major + "." + .serverVersion.minor' | sed 's/[^0-9.]*$//')
|
||||
echo "Detected KUBE_VERSION=${KUBE_VERSION}"
|
||||
|
||||
echo "Fetching the stable test package version for KUBE_VERSION=${KUBE_VERSION}"
|
||||
test_package_version=$(curl -L https://dl.k8s.io/release/stable-${KUBE_VERSION}.txt 2>/dev/null)
|
||||
|
||||
if echo "$test_package_version" | grep -q "Error"; then
|
||||
echo "Error: Failed to fetch test package version for KUBE_VERSION=${KUBE_VERSION}. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
echo "Fetched test package version ${test_package_version}"
|
||||
|
||||
echo "Starting kubetest2 with ginkgo tests..."
|
||||
kubetest2 noop --run-id='e2e-kubernetes' --test=ginkgo -- --test-package-version="$test_package_version" --skip-regex='[Disruptive]|[Serial]' --focus-regex="$FOCUS_REGEX" --parallel=25 --test-args='-storage.testdriver=/etc/config/manifests.yaml'
|
||||
echo "kubetest2 test run completed."
|
||||
volumeMounts:
|
||||
- name: config-vol
|
||||
mountPath: /etc/config
|
||||
# kubekins-e2e v1 image is linux amd64 only.
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
kubernetes.io/arch: amd64
|
||||
serviceAccountName: ebs-csi-driver-test
|
||||
volumes:
|
||||
- name: config-vol
|
||||
|
@ -11,13 +11,18 @@ image:
|
||||
customLabels: {}
|
||||
# k8s-app: aws-ebs-csi-driver
|
||||
|
||||
# Instruct the AWS SDK to use AWS FIPS endpoints, and deploy container built with BoringCrypto (a FIPS-validated cryptographic library) instead of the Go default
|
||||
#
|
||||
# The EBS CSI Driver FIPS images have not undergone FIPS certification, and no official guarnatee is made about the compliance of these images under the FIPS standard
|
||||
# Users relying on these images for FIPS compliance should perform their own independent evaluation
|
||||
fips: false
|
||||
sidecars:
|
||||
provisioner:
|
||||
env: []
|
||||
image:
|
||||
pullPolicy: IfNotPresent
|
||||
repository: public.ecr.aws/eks-distro/kubernetes-csi/external-provisioner
|
||||
tag: "v5.0.1-eks-1-30-8"
|
||||
tag: "v5.1.0-eks-1-31-12"
|
||||
logLevel: 2
|
||||
# Additional parameters provided by external-provisioner.
|
||||
additionalArgs: []
|
||||
@ -44,7 +49,7 @@ sidecars:
|
||||
image:
|
||||
pullPolicy: IfNotPresent
|
||||
repository: public.ecr.aws/eks-distro/kubernetes-csi/external-attacher
|
||||
tag: "v4.6.1-eks-1-30-8"
|
||||
tag: "v4.8.0-eks-1-31-12"
|
||||
# Tune leader lease election for csi-attacher.
|
||||
# Leader election is on by default.
|
||||
leaderElection:
|
||||
@ -73,7 +78,7 @@ sidecars:
|
||||
image:
|
||||
pullPolicy: IfNotPresent
|
||||
repository: public.ecr.aws/eks-distro/kubernetes-csi/external-snapshotter/csi-snapshotter
|
||||
tag: "v8.0.1-eks-1-30-8"
|
||||
tag: "v8.2.0-eks-1-31-12"
|
||||
logLevel: 2
|
||||
# Additional parameters provided by csi-snapshotter.
|
||||
additionalArgs: []
|
||||
@ -89,7 +94,7 @@ sidecars:
|
||||
image:
|
||||
pullPolicy: IfNotPresent
|
||||
repository: public.ecr.aws/eks-distro/kubernetes-csi/livenessprobe
|
||||
tag: "v2.13.0-eks-1-30-8"
|
||||
tag: "v2.14.0-eks-1-31-12"
|
||||
# Additional parameters provided by livenessprobe.
|
||||
additionalArgs: []
|
||||
resources: {}
|
||||
@ -101,7 +106,7 @@ sidecars:
|
||||
image:
|
||||
pullPolicy: IfNotPresent
|
||||
repository: public.ecr.aws/eks-distro/kubernetes-csi/external-resizer
|
||||
tag: "v1.11.1-eks-1-30-8"
|
||||
tag: "v1.12.0-eks-1-31-11"
|
||||
# Tune leader lease election for csi-resizer.
|
||||
# Leader election is on by default.
|
||||
leaderElection:
|
||||
@ -128,7 +133,7 @@ sidecars:
|
||||
image:
|
||||
pullPolicy: IfNotPresent
|
||||
repository: public.ecr.aws/eks-distro/kubernetes-csi/node-driver-registrar
|
||||
tag: "v2.11.0-eks-1-30-8"
|
||||
tag: "v2.13.0-eks-1-31-12"
|
||||
logLevel: 2
|
||||
# Additional parameters provided by node-driver-registrar.
|
||||
additionalArgs: []
|
||||
@ -150,7 +155,7 @@ sidecars:
|
||||
image:
|
||||
pullPolicy: IfNotPresent
|
||||
repository: public.ecr.aws/ebs-csi-driver/volume-modifier-for-k8s
|
||||
tag: "v0.3.0"
|
||||
tag: "v0.5.1"
|
||||
leaderElection:
|
||||
enabled: true
|
||||
# Optional values to tune lease behavior.
|
||||
@ -196,6 +201,8 @@ controller:
|
||||
operator: NotIn
|
||||
values:
|
||||
- fargate
|
||||
- auto
|
||||
- hybrid
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- podAffinityTerm:
|
||||
@ -245,6 +252,11 @@ controller:
|
||||
deploymentAnnotations: {}
|
||||
podAnnotations: {}
|
||||
podLabels: {}
|
||||
podDisruptionBudget:
|
||||
# Warning: Disabling PodDisruptionBudget may lead to delays in stateful workloads starting due to controller
|
||||
# pod restarts or evictions.
|
||||
enabled: true
|
||||
# unhealthyPodEvictionPolicy:
|
||||
priorityClassName: system-cluster-critical
|
||||
# AWS region to use. If not specified then the region will be looked up via the AWS EC2 metadata
|
||||
# service.
|
||||
@ -335,11 +347,15 @@ controller:
|
||||
# Enable dnsConfig for the controller and node pods
|
||||
dnsConfig: {}
|
||||
node:
|
||||
# Enable SELinux-only optimizations on the EBS CSI Driver node pods
|
||||
# Must only be set true if all linux nodes in the DaemonSet have SELinux enabled
|
||||
selinux: false
|
||||
env: []
|
||||
envFrom: []
|
||||
kubeletPath: /var/lib/kubelet
|
||||
loggingFormat: text
|
||||
logLevel: 2
|
||||
enableMetrics: false
|
||||
priorityClassName:
|
||||
additionalArgs: []
|
||||
affinity:
|
||||
@ -351,6 +367,8 @@ node:
|
||||
operator: NotIn
|
||||
values:
|
||||
- fargate
|
||||
- auto
|
||||
- hybrid
|
||||
- key: node.kubernetes.io/instance-type
|
||||
operator: NotIn
|
||||
values:
|
||||
@ -388,6 +406,10 @@ node:
|
||||
# Enable the linux daemonset creation
|
||||
enableLinux: true
|
||||
enableWindows: false
|
||||
# Warning: This option will be removed in a future release. It is a temporary workaround for users unable to immediately migrate off of older kernel versions.
|
||||
# Formats XFS volumes with bigtime=0,inobtcount=0,reflink=0, for mounting onto nodes with linux kernel version <= 5.4.
|
||||
# Note that XFS volumes formatted with this option will only have timestamp records until 2038.
|
||||
legacyXFS: false
|
||||
# The number of attachment slots to reserve for system use (and not to be used for CSI volumes)
|
||||
# When this parameter is not specified (or set to -1), the EBS CSI Driver will attempt to determine the number of reserved slots via heuristic
|
||||
# Cannot be specified at the same time as `node.volumeAttachLimit`
|
||||
@ -426,6 +448,14 @@ node:
|
||||
containerSecurityContext:
|
||||
readOnlyRootFilesystem: true
|
||||
privileged: true
|
||||
initContainers: []
|
||||
# containers to be run before the csi-node's container starts.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# - name: wait
|
||||
# image: busybox
|
||||
# command: [ 'sh', '-c', "sleep 20" ]
|
||||
# Enable opentelemetry tracing for the plugin running on the daemonset
|
||||
otelTracing: {}
|
||||
# otelServiceName: ebs-csi-node
|
||||
@ -476,7 +506,9 @@ volumeSnapshotClasses: []
|
||||
# Intended for use with older clusters that cannot easily replace the CSIDriver object
|
||||
# This parameter should always be false for new installations
|
||||
useOldCSIDriver: false
|
||||
# Deploy EBS CSI Driver without controller and associated resources
|
||||
nodeComponentOnly: false
|
||||
helmTester:
|
||||
enabled: true
|
||||
# Supply a custom image to the ebs-csi-driver-test pod in helm-tester.yaml
|
||||
image: "gcr.io/k8s-staging-test-infra/kubekins-e2e:v20240611-597c402033-master"
|
||||
image: "us-central1-docker.pkg.dev/k8s-staging-test-infra/images/kubekins-e2e:v20241230-3006692a6f-master"
|
||||
|
@ -1,4 +1,24 @@
|
||||
# Helm chart
|
||||
# v3.1.6
|
||||
* Bump app/driver version to `v2.1.5`
|
||||
# v3.1.5
|
||||
* Bump app/driver version to `v2.1.4`
|
||||
# v3.1.4
|
||||
* Bump app/driver version to `v2.1.3`
|
||||
# v3.1.3
|
||||
* Bump app/driver version to `v2.1.2`
|
||||
# v3.1.2
|
||||
* Bump app/driver version to `v2.1.1`
|
||||
# v3.1.1
|
||||
* Bump app/driver version to `v2.1.0`
|
||||
# v3.1.0
|
||||
* Bump app/driver version to `v2.0.9`
|
||||
# v3.0.9
|
||||
* Bump app/driver version to `v2.0.8`
|
||||
# v3.0.8
|
||||
* Bump app/driver version to `v2.0.7`
|
||||
# v3.0.7
|
||||
* Bump app/driver version to `v2.0.6`
|
||||
# v3.0.6
|
||||
* Bump app/driver version to `v2.0.5`
|
||||
# v3.0.5
|
||||
|
@ -1,5 +1,5 @@
|
||||
apiVersion: v2
|
||||
appVersion: 2.0.5
|
||||
appVersion: 2.1.5
|
||||
description: A Helm chart for AWS EFS CSI Driver
|
||||
home: https://github.com/kubernetes-sigs/aws-efs-csi-driver
|
||||
keywords:
|
||||
@ -15,4 +15,4 @@ maintainers:
|
||||
name: aws-efs-csi-driver
|
||||
sources:
|
||||
- https://github.com/kubernetes-sigs/aws-efs-csi-driver
|
||||
version: 3.0.6
|
||||
version: 3.1.6
|
||||
|
@ -3,17 +3,18 @@
|
||||
kind: Deployment
|
||||
apiVersion: apps/v1
|
||||
metadata:
|
||||
name: efs-csi-controller
|
||||
name: {{ .Values.controller.name }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
app.kubernetes.io/name: {{ include "aws-efs-csi-driver.name" . }}
|
||||
{{- with .Values.controller.additionalLabels }}
|
||||
{{ toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
replicas: {{ .Values.controller.replicaCount }}
|
||||
selector:
|
||||
matchLabels:
|
||||
app: efs-csi-controller
|
||||
app: {{ .Values.controller.name }}
|
||||
app.kubernetes.io/name: {{ include "aws-efs-csi-driver.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- with .Values.controller.updateStrategy }}
|
||||
@ -23,7 +24,7 @@ spec:
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: efs-csi-controller
|
||||
app: {{ .Values.controller.name }}
|
||||
app.kubernetes.io/name: {{ include "aws-efs-csi-driver.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- with .Values.controller.podLabels }}
|
||||
@ -93,6 +94,8 @@ spec:
|
||||
- name: AWS_USE_FIPS_ENDPOINT
|
||||
value: "true"
|
||||
{{- end }}
|
||||
- name: PORT_RANGE_UPPER_BOUND
|
||||
value: "{{ .Values.portRangeUpperBound }}"
|
||||
{{- with .Values.controller.env }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
@ -134,13 +137,16 @@ spec:
|
||||
{{- if hasKey .Values.controller "leaderElectionLeaseDuration" }}
|
||||
- --leader-election-lease-duration={{ .Values.controller.leaderElectionLeaseDuration }}
|
||||
{{- end }}
|
||||
{{- range .Values.sidecars.csiProvisioner.additionalArgs }}
|
||||
- {{ . }}
|
||||
{{- end }}
|
||||
env:
|
||||
- name: ADDRESS
|
||||
value: /var/lib/csi/sockets/pluginproxy/csi.sock
|
||||
volumeMounts:
|
||||
- name: socket-dir
|
||||
mountPath: /var/lib/csi/sockets/pluginproxy/
|
||||
{{- with .Values.sidecars.csiProvisioner.resources }}
|
||||
{{- with default .Values.controller.resources .Values.sidecars.csiProvisioner.resources }}
|
||||
resources: {{ toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- with .Values.sidecars.csiProvisioner.securityContext }}
|
||||
@ -159,7 +165,7 @@ spec:
|
||||
{{- with .Values.controller.volumeMounts }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- with .Values.sidecars.livenessProbe.resources }}
|
||||
{{- with default .Values.controller.resources .Values.sidecars.livenessProbe.resources }}
|
||||
resources: {{ toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- with .Values.sidecars.livenessProbe.securityContext }}
|
||||
@ -175,4 +181,13 @@ spec:
|
||||
{{- with .Values.controller.affinity }}
|
||||
affinity: {{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- if .Values.controller.topologySpreadConstraints }}
|
||||
{{- $tscLabelSelector := dict "labelSelector" ( dict "matchLabels" ( dict "app" "efs-csi-controller" ) ) }}
|
||||
{{- $constraints := list }}
|
||||
{{- range .Values.controller.topologySpreadConstraints }}
|
||||
{{- $constraints = mustAppend $constraints (mergeOverwrite . $tscLabelSelector) }}
|
||||
{{- end }}
|
||||
topologySpreadConstraints:
|
||||
{{- $constraints | toYaml | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user